qemu/hw/i386/pc.c
<<
>>
Prefs
   1/*
   2 * QEMU PC System Emulator
   3 *
   4 * Copyright (c) 2003-2004 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "qemu/units.h"
  27#include "hw/hw.h"
  28#include "hw/i386/pc.h"
  29#include "hw/char/serial.h"
  30#include "hw/char/parallel.h"
  31#include "hw/i386/apic.h"
  32#include "hw/i386/topology.h"
  33#include "hw/i386/fw_cfg.h"
  34#include "sysemu/cpus.h"
  35#include "hw/block/fdc.h"
  36#include "hw/ide.h"
  37#include "hw/pci/pci.h"
  38#include "hw/pci/pci_bus.h"
  39#include "hw/nvram/fw_cfg.h"
  40#include "hw/timer/hpet.h"
  41#include "hw/firmware/smbios.h"
  42#include "hw/loader.h"
  43#include "elf.h"
  44#include "multiboot.h"
  45#include "hw/timer/mc146818rtc.h"
  46#include "hw/dma/i8257.h"
  47#include "hw/timer/i8254.h"
  48#include "hw/input/i8042.h"
  49#include "hw/audio/pcspk.h"
  50#include "hw/pci/msi.h"
  51#include "hw/sysbus.h"
  52#include "sysemu/sysemu.h"
  53#include "sysemu/tcg.h"
  54#include "sysemu/numa.h"
  55#include "sysemu/kvm.h"
  56#include "sysemu/qtest.h"
  57#include "kvm_i386.h"
  58#include "hw/xen/xen.h"
  59#include "hw/xen/start_info.h"
  60#include "ui/qemu-spice.h"
  61#include "exec/memory.h"
  62#include "exec/address-spaces.h"
  63#include "sysemu/arch_init.h"
  64#include "qemu/bitmap.h"
  65#include "qemu/config-file.h"
  66#include "qemu/error-report.h"
  67#include "qemu/option.h"
  68#include "hw/acpi/acpi.h"
  69#include "hw/acpi/cpu_hotplug.h"
  70#include "hw/boards.h"
  71#include "acpi-build.h"
  72#include "hw/mem/pc-dimm.h"
  73#include "qapi/error.h"
  74#include "qapi/qapi-visit-common.h"
  75#include "qapi/visitor.h"
  76#include "qom/cpu.h"
  77#include "hw/nmi.h"
  78#include "hw/usb.h"
  79#include "hw/i386/intel_iommu.h"
  80#include "hw/net/ne2000-isa.h"
  81#include "standard-headers/asm-x86/bootparam.h"
  82#include "hw/virtio/virtio-pmem-pci.h"
  83#include "hw/mem/memory-device.h"
  84#include "sysemu/replay.h"
  85#include "qapi/qmp/qerror.h"
  86#include "config-devices.h"
  87
  88/* debug PC/ISA interrupts */
  89//#define DEBUG_IRQ
  90
  91#ifdef DEBUG_IRQ
  92#define DPRINTF(fmt, ...)                                       \
  93    do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0)
  94#else
  95#define DPRINTF(fmt, ...)
  96#endif
  97
  98#define E820_NR_ENTRIES         16
  99
 100struct e820_entry {
 101    uint64_t address;
 102    uint64_t length;
 103    uint32_t type;
 104} QEMU_PACKED __attribute((__aligned__(4)));
 105
 106struct e820_table {
 107    uint32_t count;
 108    struct e820_entry entry[E820_NR_ENTRIES];
 109} QEMU_PACKED __attribute((__aligned__(4)));
 110
 111static struct e820_table e820_reserve;
 112static struct e820_entry *e820_table;
 113static unsigned e820_entries;
 114struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 115
 116/* Physical Address of PVH entry point read from kernel ELF NOTE */
 117static size_t pvh_start_addr;
 118
 119GlobalProperty pc_compat_4_0[] = {};
 120const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
 121
 122GlobalProperty pc_compat_3_1[] = {
 123    { "intel-iommu", "dma-drain", "off" },
 124    { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
 125    { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" },
 126    { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" },
 127    { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" },
 128    { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" },
 129    { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" },
 130    { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" },
 131    { "EPYC" "-" TYPE_X86_CPU, "npt", "off" },
 132    { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" },
 133    { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" },
 134    { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" },
 135    { "Skylake-Client" "-" TYPE_X86_CPU,      "mpx", "on" },
 136    { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
 137    { "Skylake-Server" "-" TYPE_X86_CPU,      "mpx", "on" },
 138    { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
 139    { "Cascadelake-Server" "-" TYPE_X86_CPU,  "mpx", "on" },
 140    { "Icelake-Client" "-" TYPE_X86_CPU,      "mpx", "on" },
 141    { "Icelake-Server" "-" TYPE_X86_CPU,      "mpx", "on" },
 142    { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
 143    { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
 144};
 145const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1);
 146
 147GlobalProperty pc_compat_3_0[] = {
 148    { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" },
 149    { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" },
 150    { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" },
 151};
 152const size_t pc_compat_3_0_len = G_N_ELEMENTS(pc_compat_3_0);
 153
 154GlobalProperty pc_compat_2_12[] = {
 155    { TYPE_X86_CPU, "legacy-cache", "on" },
 156    { TYPE_X86_CPU, "topoext", "off" },
 157    { "EPYC-" TYPE_X86_CPU, "xlevel", "0x8000000a" },
 158    { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", "0x8000000a" },
 159};
 160const size_t pc_compat_2_12_len = G_N_ELEMENTS(pc_compat_2_12);
 161
 162GlobalProperty pc_compat_2_11[] = {
 163    { TYPE_X86_CPU, "x-migrate-smi-count", "off" },
 164    { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" },
 165};
 166const size_t pc_compat_2_11_len = G_N_ELEMENTS(pc_compat_2_11);
 167
 168GlobalProperty pc_compat_2_10[] = {
 169    { TYPE_X86_CPU, "x-hv-max-vps", "0x40" },
 170    { "i440FX-pcihost", "x-pci-hole64-fix", "off" },
 171    { "q35-pcihost", "x-pci-hole64-fix", "off" },
 172};
 173const size_t pc_compat_2_10_len = G_N_ELEMENTS(pc_compat_2_10);
 174
 175GlobalProperty pc_compat_2_9[] = {
 176    { "mch", "extended-tseg-mbytes", "0" },
 177};
 178const size_t pc_compat_2_9_len = G_N_ELEMENTS(pc_compat_2_9);
 179
 180GlobalProperty pc_compat_2_8[] = {
 181    { TYPE_X86_CPU, "tcg-cpuid", "off" },
 182    { "kvmclock", "x-mach-use-reliable-get-clock", "off" },
 183    { "ICH9-LPC", "x-smi-broadcast", "off" },
 184    { TYPE_X86_CPU, "vmware-cpuid-freq", "off" },
 185    { "Haswell-" TYPE_X86_CPU, "stepping", "1" },
 186};
 187const size_t pc_compat_2_8_len = G_N_ELEMENTS(pc_compat_2_8);
 188
 189GlobalProperty pc_compat_2_7[] = {
 190    { TYPE_X86_CPU, "l3-cache", "off" },
 191    { TYPE_X86_CPU, "full-cpuid-auto-level", "off" },
 192    { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" },
 193    { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" },
 194    { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" },
 195    { "isa-pcspk", "migrate", "off" },
 196};
 197const size_t pc_compat_2_7_len = G_N_ELEMENTS(pc_compat_2_7);
 198
 199GlobalProperty pc_compat_2_6[] = {
 200    { TYPE_X86_CPU, "cpuid-0xb", "off" },
 201    { "vmxnet3", "romfile", "" },
 202    { TYPE_X86_CPU, "fill-mtrr-mask", "off" },
 203    { "apic-common", "legacy-instance-id", "on", }
 204};
 205const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
 206
 207GlobalProperty pc_compat_2_5[] = {};
 208const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
 209
 210GlobalProperty pc_compat_2_4[] = {
 211    PC_CPU_MODEL_IDS("2.4.0")
 212    { "Haswell-" TYPE_X86_CPU, "abm", "off" },
 213    { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
 214    { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
 215    { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
 216    { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
 217    { TYPE_X86_CPU, "check", "off" },
 218    { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
 219    { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
 220    { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
 221    { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
 222    { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
 223    { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
 224    { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
 225    { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
 226};
 227const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
 228
 229GlobalProperty pc_compat_2_3[] = {
 230    PC_CPU_MODEL_IDS("2.3.0")
 231    { TYPE_X86_CPU, "arat", "off" },
 232    { "qemu64" "-" TYPE_X86_CPU, "min-level", "4" },
 233    { "kvm64" "-" TYPE_X86_CPU, "min-level", "5" },
 234    { "pentium3" "-" TYPE_X86_CPU, "min-level", "2" },
 235    { "n270" "-" TYPE_X86_CPU, "min-level", "5" },
 236    { "Conroe" "-" TYPE_X86_CPU, "min-level", "4" },
 237    { "Penryn" "-" TYPE_X86_CPU, "min-level", "4" },
 238    { "Nehalem" "-" TYPE_X86_CPU, "min-level", "4" },
 239    { "n270" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 240    { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 241    { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 242    { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 243    { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 244    { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 245    { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 246    { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 247    { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 248    { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 249    { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", "0x8000000a" },
 250    { TYPE_X86_CPU, "kvm-no-smi-migration", "on" },
 251};
 252const size_t pc_compat_2_3_len = G_N_ELEMENTS(pc_compat_2_3);
 253
 254GlobalProperty pc_compat_2_2[] = {
 255    PC_CPU_MODEL_IDS("2.2.0")
 256    { "kvm64" "-" TYPE_X86_CPU, "vme", "off" },
 257    { "kvm32" "-" TYPE_X86_CPU, "vme", "off" },
 258    { "Conroe" "-" TYPE_X86_CPU, "vme", "off" },
 259    { "Penryn" "-" TYPE_X86_CPU, "vme", "off" },
 260    { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" },
 261    { "Westmere" "-" TYPE_X86_CPU, "vme", "off" },
 262    { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" },
 263    { "Haswell" "-" TYPE_X86_CPU, "vme", "off" },
 264    { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" },
 265    { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" },
 266    { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" },
 267    { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" },
 268    { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" },
 269    { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" },
 270    { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" },
 271    { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" },
 272    { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" },
 273    { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" },
 274};
 275const size_t pc_compat_2_2_len = G_N_ELEMENTS(pc_compat_2_2);
 276
 277GlobalProperty pc_compat_2_1[] = {
 278    PC_CPU_MODEL_IDS("2.1.0")
 279    { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" },
 280    { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" },
 281};
 282const size_t pc_compat_2_1_len = G_N_ELEMENTS(pc_compat_2_1);
 283
 284GlobalProperty pc_compat_2_0[] = {
 285    PC_CPU_MODEL_IDS("2.0.0")
 286    { "virtio-scsi-pci", "any_layout", "off" },
 287    { "PIIX4_PM", "memory-hotplug-support", "off" },
 288    { "apic", "version", "0x11" },
 289    { "nec-usb-xhci", "superspeed-ports-first", "off" },
 290    { "nec-usb-xhci", "force-pcie-endcap", "on" },
 291    { "pci-serial", "prog_if", "0" },
 292    { "pci-serial-2x", "prog_if", "0" },
 293    { "pci-serial-4x", "prog_if", "0" },
 294    { "virtio-net-pci", "guest_announce", "off" },
 295    { "ICH9-LPC", "memory-hotplug-support", "off" },
 296    { "xio3130-downstream", COMPAT_PROP_PCP, "off" },
 297    { "ioh3420", COMPAT_PROP_PCP, "off" },
 298};
 299const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0);
 300
 301GlobalProperty pc_compat_1_7[] = {
 302    PC_CPU_MODEL_IDS("1.7.0")
 303    { TYPE_USB_DEVICE, "msos-desc", "no" },
 304    { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" },
 305    { "hpet", HPET_INTCAP, "4" },
 306};
 307const size_t pc_compat_1_7_len = G_N_ELEMENTS(pc_compat_1_7);
 308
 309GlobalProperty pc_compat_1_6[] = {
 310    PC_CPU_MODEL_IDS("1.6.0")
 311    { "e1000", "mitigation", "off" },
 312    { "qemu64-" TYPE_X86_CPU, "model", "2" },
 313    { "qemu32-" TYPE_X86_CPU, "model", "3" },
 314    { "i440FX-pcihost", "short_root_bus", "1" },
 315    { "q35-pcihost", "short_root_bus", "1" },
 316};
 317const size_t pc_compat_1_6_len = G_N_ELEMENTS(pc_compat_1_6);
 318
 319GlobalProperty pc_compat_1_5[] = {
 320    PC_CPU_MODEL_IDS("1.5.0")
 321    { "Conroe-" TYPE_X86_CPU, "model", "2" },
 322    { "Conroe-" TYPE_X86_CPU, "min-level", "2" },
 323    { "Penryn-" TYPE_X86_CPU, "model", "2" },
 324    { "Penryn-" TYPE_X86_CPU, "min-level", "2" },
 325    { "Nehalem-" TYPE_X86_CPU, "model", "2" },
 326    { "Nehalem-" TYPE_X86_CPU, "min-level", "2" },
 327    { "virtio-net-pci", "any_layout", "off" },
 328    { TYPE_X86_CPU, "pmu", "on" },
 329    { "i440FX-pcihost", "short_root_bus", "0" },
 330    { "q35-pcihost", "short_root_bus", "0" },
 331};
 332const size_t pc_compat_1_5_len = G_N_ELEMENTS(pc_compat_1_5);
 333
 334GlobalProperty pc_compat_1_4[] = {
 335    PC_CPU_MODEL_IDS("1.4.0")
 336    { "scsi-hd", "discard_granularity", "0" },
 337    { "scsi-cd", "discard_granularity", "0" },
 338    { "scsi-disk", "discard_granularity", "0" },
 339    { "ide-hd", "discard_granularity", "0" },
 340    { "ide-cd", "discard_granularity", "0" },
 341    { "ide-drive", "discard_granularity", "0" },
 342    { "virtio-blk-pci", "discard_granularity", "0" },
 343    /* DEV_NVECTORS_UNSPECIFIED as a uint32_t string: */
 344    { "virtio-serial-pci", "vectors", "0xFFFFFFFF" },
 345    { "virtio-net-pci", "ctrl_guest_offloads", "off" },
 346    { "e1000", "romfile", "pxe-e1000.rom" },
 347    { "ne2k_pci", "romfile", "pxe-ne2k_pci.rom" },
 348    { "pcnet", "romfile", "pxe-pcnet.rom" },
 349    { "rtl8139", "romfile", "pxe-rtl8139.rom" },
 350    { "virtio-net-pci", "romfile", "pxe-virtio.rom" },
 351    { "486-" TYPE_X86_CPU, "model", "0" },
 352    { "n270" "-" TYPE_X86_CPU, "movbe", "off" },
 353    { "Westmere" "-" TYPE_X86_CPU, "pclmulqdq", "off" },
 354};
 355const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
 356
 357void gsi_handler(void *opaque, int n, int level)
 358{
 359    GSIState *s = opaque;
 360
 361    DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n);
 362    if (n < ISA_NUM_IRQS) {
 363        qemu_set_irq(s->i8259_irq[n], level);
 364    }
 365    qemu_set_irq(s->ioapic_irq[n], level);
 366}
 367
 368static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
 369                           unsigned size)
 370{
 371}
 372
 373static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
 374{
 375    return 0xffffffffffffffffULL;
 376}
 377
 378/* MSDOS compatibility mode FPU exception support */
 379static qemu_irq ferr_irq;
 380
 381void pc_register_ferr_irq(qemu_irq irq)
 382{
 383    ferr_irq = irq;
 384}
 385
 386/* XXX: add IGNNE support */
 387void cpu_set_ferr(CPUX86State *s)
 388{
 389    qemu_irq_raise(ferr_irq);
 390}
 391
 392static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
 393                           unsigned size)
 394{
 395    qemu_irq_lower(ferr_irq);
 396}
 397
 398static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
 399{
 400    return 0xffffffffffffffffULL;
 401}
 402
 403/* TSC handling */
 404uint64_t cpu_get_tsc(CPUX86State *env)
 405{
 406    return cpu_get_ticks();
 407}
 408
 409/* IRQ handling */
 410int cpu_get_pic_interrupt(CPUX86State *env)
 411{
 412    X86CPU *cpu = env_archcpu(env);
 413    int intno;
 414
 415    if (!kvm_irqchip_in_kernel()) {
 416        intno = apic_get_interrupt(cpu->apic_state);
 417        if (intno >= 0) {
 418            return intno;
 419        }
 420        /* read the irq from the PIC */
 421        if (!apic_accept_pic_intr(cpu->apic_state)) {
 422            return -1;
 423        }
 424    }
 425
 426    intno = pic_read_irq(isa_pic);
 427    return intno;
 428}
 429
 430static void pic_irq_request(void *opaque, int irq, int level)
 431{
 432    CPUState *cs = first_cpu;
 433    X86CPU *cpu = X86_CPU(cs);
 434
 435    DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
 436    if (cpu->apic_state && !kvm_irqchip_in_kernel()) {
 437        CPU_FOREACH(cs) {
 438            cpu = X86_CPU(cs);
 439            if (apic_accept_pic_intr(cpu->apic_state)) {
 440                apic_deliver_pic_intr(cpu->apic_state, level);
 441            }
 442        }
 443    } else {
 444        if (level) {
 445            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
 446        } else {
 447            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
 448        }
 449    }
 450}
 451
 452/* PC cmos mappings */
 453
 454#define REG_EQUIPMENT_BYTE          0x14
 455
 456int cmos_get_fd_drive_type(FloppyDriveType fd0)
 457{
 458    int val;
 459
 460    switch (fd0) {
 461    case FLOPPY_DRIVE_TYPE_144:
 462        /* 1.44 Mb 3"5 drive */
 463        val = 4;
 464        break;
 465    case FLOPPY_DRIVE_TYPE_288:
 466        /* 2.88 Mb 3"5 drive */
 467        val = 5;
 468        break;
 469    case FLOPPY_DRIVE_TYPE_120:
 470        /* 1.2 Mb 5"5 drive */
 471        val = 2;
 472        break;
 473    case FLOPPY_DRIVE_TYPE_NONE:
 474    default:
 475        val = 0;
 476        break;
 477    }
 478    return val;
 479}
 480
 481static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
 482                         int16_t cylinders, int8_t heads, int8_t sectors)
 483{
 484    rtc_set_memory(s, type_ofs, 47);
 485    rtc_set_memory(s, info_ofs, cylinders);
 486    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
 487    rtc_set_memory(s, info_ofs + 2, heads);
 488    rtc_set_memory(s, info_ofs + 3, 0xff);
 489    rtc_set_memory(s, info_ofs + 4, 0xff);
 490    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
 491    rtc_set_memory(s, info_ofs + 6, cylinders);
 492    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
 493    rtc_set_memory(s, info_ofs + 8, sectors);
 494}
 495
 496/* convert boot_device letter to something recognizable by the bios */
 497static int boot_device2nibble(char boot_device)
 498{
 499    switch(boot_device) {
 500    case 'a':
 501    case 'b':
 502        return 0x01; /* floppy boot */
 503    case 'c':
 504        return 0x02; /* hard drive boot */
 505    case 'd':
 506        return 0x03; /* CD-ROM boot */
 507    case 'n':
 508        return 0x04; /* Network boot */
 509    }
 510    return 0;
 511}
 512
 513static void set_boot_dev(ISADevice *s, const char *boot_device, Error **errp)
 514{
 515#define PC_MAX_BOOT_DEVICES 3
 516    int nbds, bds[3] = { 0, };
 517    int i;
 518
 519    nbds = strlen(boot_device);
 520    if (nbds > PC_MAX_BOOT_DEVICES) {
 521        error_setg(errp, "Too many boot devices for PC");
 522        return;
 523    }
 524    for (i = 0; i < nbds; i++) {
 525        bds[i] = boot_device2nibble(boot_device[i]);
 526        if (bds[i] == 0) {
 527            error_setg(errp, "Invalid boot device for PC: '%c'",
 528                       boot_device[i]);
 529            return;
 530        }
 531    }
 532    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
 533    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
 534}
 535
 536static void pc_boot_set(void *opaque, const char *boot_device, Error **errp)
 537{
 538    set_boot_dev(opaque, boot_device, errp);
 539}
 540
 541static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy)
 542{
 543    int val, nb, i;
 544    FloppyDriveType fd_type[2] = { FLOPPY_DRIVE_TYPE_NONE,
 545                                   FLOPPY_DRIVE_TYPE_NONE };
 546
 547    /* floppy type */
 548    if (floppy) {
 549        for (i = 0; i < 2; i++) {
 550            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
 551        }
 552    }
 553    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
 554        cmos_get_fd_drive_type(fd_type[1]);
 555    rtc_set_memory(rtc_state, 0x10, val);
 556
 557    val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE);
 558    nb = 0;
 559    if (fd_type[0] != FLOPPY_DRIVE_TYPE_NONE) {
 560        nb++;
 561    }
 562    if (fd_type[1] != FLOPPY_DRIVE_TYPE_NONE) {
 563        nb++;
 564    }
 565    switch (nb) {
 566    case 0:
 567        break;
 568    case 1:
 569        val |= 0x01; /* 1 drive, ready for boot */
 570        break;
 571    case 2:
 572        val |= 0x41; /* 2 drives, ready for boot */
 573        break;
 574    }
 575    rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val);
 576}
 577
 578typedef struct pc_cmos_init_late_arg {
 579    ISADevice *rtc_state;
 580    BusState *idebus[2];
 581} pc_cmos_init_late_arg;
 582
 583typedef struct check_fdc_state {
 584    ISADevice *floppy;
 585    bool multiple;
 586} CheckFdcState;
 587
 588static int check_fdc(Object *obj, void *opaque)
 589{
 590    CheckFdcState *state = opaque;
 591    Object *fdc;
 592    uint32_t iobase;
 593    Error *local_err = NULL;
 594
 595    fdc = object_dynamic_cast(obj, TYPE_ISA_FDC);
 596    if (!fdc) {
 597        return 0;
 598    }
 599
 600    iobase = object_property_get_uint(obj, "iobase", &local_err);
 601    if (local_err || iobase != 0x3f0) {
 602        error_free(local_err);
 603        return 0;
 604    }
 605
 606    if (state->floppy) {
 607        state->multiple = true;
 608    } else {
 609        state->floppy = ISA_DEVICE(obj);
 610    }
 611    return 0;
 612}
 613
 614static const char * const fdc_container_path[] = {
 615    "/unattached", "/peripheral", "/peripheral-anon"
 616};
 617
 618/*
 619 * Locate the FDC at IO address 0x3f0, in order to configure the CMOS registers
 620 * and ACPI objects.
 621 */
 622ISADevice *pc_find_fdc0(void)
 623{
 624    int i;
 625    Object *container;
 626    CheckFdcState state = { 0 };
 627
 628    for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) {
 629        container = container_get(qdev_get_machine(), fdc_container_path[i]);
 630        object_child_foreach(container, check_fdc, &state);
 631    }
 632
 633    if (state.multiple) {
 634        warn_report("multiple floppy disk controllers with "
 635                    "iobase=0x3f0 have been found");
 636        error_printf("the one being picked for CMOS setup might not reflect "
 637                     "your intent");
 638    }
 639
 640    return state.floppy;
 641}
 642
 643static void pc_cmos_init_late(void *opaque)
 644{
 645    pc_cmos_init_late_arg *arg = opaque;
 646    ISADevice *s = arg->rtc_state;
 647    int16_t cylinders;
 648    int8_t heads, sectors;
 649    int val;
 650    int i, trans;
 651
 652    val = 0;
 653    if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 0,
 654                                           &cylinders, &heads, &sectors) >= 0) {
 655        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
 656        val |= 0xf0;
 657    }
 658    if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 1,
 659                                           &cylinders, &heads, &sectors) >= 0) {
 660        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
 661        val |= 0x0f;
 662    }
 663    rtc_set_memory(s, 0x12, val);
 664
 665    val = 0;
 666    for (i = 0; i < 4; i++) {
 667        /* NOTE: ide_get_geometry() returns the physical
 668           geometry.  It is always such that: 1 <= sects <= 63, 1
 669           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
 670           geometry can be different if a translation is done. */
 671        if (arg->idebus[i / 2] &&
 672            ide_get_geometry(arg->idebus[i / 2], i % 2,
 673                             &cylinders, &heads, &sectors) >= 0) {
 674            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
 675            assert((trans & ~3) == 0);
 676            val |= trans << (i * 2);
 677        }
 678    }
 679    rtc_set_memory(s, 0x39, val);
 680
 681    pc_cmos_init_floppy(s, pc_find_fdc0());
 682
 683    qemu_unregister_reset(pc_cmos_init_late, opaque);
 684}
 685
 686void pc_cmos_init(PCMachineState *pcms,
 687                  BusState *idebus0, BusState *idebus1,
 688                  ISADevice *s)
 689{
 690    int val;
 691    static pc_cmos_init_late_arg arg;
 692
 693    /* various important CMOS locations needed by PC/Bochs bios */
 694
 695    /* memory size */
 696    /* base memory (first MiB) */
 697    val = MIN(pcms->below_4g_mem_size / KiB, 640);
 698    rtc_set_memory(s, 0x15, val);
 699    rtc_set_memory(s, 0x16, val >> 8);
 700    /* extended memory (next 64MiB) */
 701    if (pcms->below_4g_mem_size > 1 * MiB) {
 702        val = (pcms->below_4g_mem_size - 1 * MiB) / KiB;
 703    } else {
 704        val = 0;
 705    }
 706    if (val > 65535)
 707        val = 65535;
 708    rtc_set_memory(s, 0x17, val);
 709    rtc_set_memory(s, 0x18, val >> 8);
 710    rtc_set_memory(s, 0x30, val);
 711    rtc_set_memory(s, 0x31, val >> 8);
 712    /* memory between 16MiB and 4GiB */
 713    if (pcms->below_4g_mem_size > 16 * MiB) {
 714        val = (pcms->below_4g_mem_size - 16 * MiB) / (64 * KiB);
 715    } else {
 716        val = 0;
 717    }
 718    if (val > 65535)
 719        val = 65535;
 720    rtc_set_memory(s, 0x34, val);
 721    rtc_set_memory(s, 0x35, val >> 8);
 722    /* memory above 4GiB */
 723    val = pcms->above_4g_mem_size / 65536;
 724    rtc_set_memory(s, 0x5b, val);
 725    rtc_set_memory(s, 0x5c, val >> 8);
 726    rtc_set_memory(s, 0x5d, val >> 16);
 727
 728    object_property_add_link(OBJECT(pcms), "rtc_state",
 729                             TYPE_ISA_DEVICE,
 730                             (Object **)&pcms->rtc,
 731                             object_property_allow_set_link,
 732                             OBJ_PROP_LINK_STRONG, &error_abort);
 733    object_property_set_link(OBJECT(pcms), OBJECT(s),
 734                             "rtc_state", &error_abort);
 735
 736    set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal);
 737
 738    val = 0;
 739    val |= 0x02; /* FPU is there */
 740    val |= 0x04; /* PS/2 mouse installed */
 741    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);
 742
 743    /* hard drives and FDC */
 744    arg.rtc_state = s;
 745    arg.idebus[0] = idebus0;
 746    arg.idebus[1] = idebus1;
 747    qemu_register_reset(pc_cmos_init_late, &arg);
 748}
 749
 750#define TYPE_PORT92 "port92"
 751#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92)
 752
 753/* port 92 stuff: could be split off */
 754typedef struct Port92State {
 755    ISADevice parent_obj;
 756
 757    MemoryRegion io;
 758    uint8_t outport;
 759    qemu_irq a20_out;
 760} Port92State;
 761
 762static void port92_write(void *opaque, hwaddr addr, uint64_t val,
 763                         unsigned size)
 764{
 765    Port92State *s = opaque;
 766    int oldval = s->outport;
 767
 768    DPRINTF("port92: write 0x%02" PRIx64 "\n", val);
 769    s->outport = val;
 770    qemu_set_irq(s->a20_out, (val >> 1) & 1);
 771    if ((val & 1) && !(oldval & 1)) {
 772        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 773    }
 774}
 775
 776static uint64_t port92_read(void *opaque, hwaddr addr,
 777                            unsigned size)
 778{
 779    Port92State *s = opaque;
 780    uint32_t ret;
 781
 782    ret = s->outport;
 783    DPRINTF("port92: read 0x%02x\n", ret);
 784    return ret;
 785}
 786
 787static void port92_init(ISADevice *dev, qemu_irq a20_out)
 788{
 789    qdev_connect_gpio_out_named(DEVICE(dev), PORT92_A20_LINE, 0, a20_out);
 790}
 791
 792static const VMStateDescription vmstate_port92_isa = {
 793    .name = "port92",
 794    .version_id = 1,
 795    .minimum_version_id = 1,
 796    .fields = (VMStateField[]) {
 797        VMSTATE_UINT8(outport, Port92State),
 798        VMSTATE_END_OF_LIST()
 799    }
 800};
 801
 802static void port92_reset(DeviceState *d)
 803{
 804    Port92State *s = PORT92(d);
 805
 806    s->outport &= ~1;
 807}
 808
 809static const MemoryRegionOps port92_ops = {
 810    .read = port92_read,
 811    .write = port92_write,
 812    .impl = {
 813        .min_access_size = 1,
 814        .max_access_size = 1,
 815    },
 816    .endianness = DEVICE_LITTLE_ENDIAN,
 817};
 818
 819static void port92_initfn(Object *obj)
 820{
 821    Port92State *s = PORT92(obj);
 822
 823    memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1);
 824
 825    s->outport = 0;
 826
 827    qdev_init_gpio_out_named(DEVICE(obj), &s->a20_out, PORT92_A20_LINE, 1);
 828}
 829
 830static void port92_realizefn(DeviceState *dev, Error **errp)
 831{
 832    ISADevice *isadev = ISA_DEVICE(dev);
 833    Port92State *s = PORT92(dev);
 834
 835    isa_register_ioport(isadev, &s->io, 0x92);
 836}
 837
 838static void port92_class_initfn(ObjectClass *klass, void *data)
 839{
 840    DeviceClass *dc = DEVICE_CLASS(klass);
 841
 842    dc->realize = port92_realizefn;
 843    dc->reset = port92_reset;
 844    dc->vmsd = &vmstate_port92_isa;
 845    /*
 846     * Reason: unlike ordinary ISA devices, this one needs additional
 847     * wiring: its A20 output line needs to be wired up by
 848     * port92_init().
 849     */
 850    dc->user_creatable = false;
 851}
 852
 853static const TypeInfo port92_info = {
 854    .name          = TYPE_PORT92,
 855    .parent        = TYPE_ISA_DEVICE,
 856    .instance_size = sizeof(Port92State),
 857    .instance_init = port92_initfn,
 858    .class_init    = port92_class_initfn,
 859};
 860
 861static void port92_register_types(void)
 862{
 863    type_register_static(&port92_info);
 864}
 865
 866type_init(port92_register_types)
 867
 868static void handle_a20_line_change(void *opaque, int irq, int level)
 869{
 870    X86CPU *cpu = opaque;
 871
 872    /* XXX: send to all CPUs ? */
 873    /* XXX: add logic to handle multiple A20 line sources */
 874    x86_cpu_set_a20(cpu, level);
 875}
 876
 877int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
 878{
 879    int index = le32_to_cpu(e820_reserve.count);
 880    struct e820_entry *entry;
 881
 882    if (type != E820_RAM) {
 883        /* old FW_CFG_E820_TABLE entry -- reservations only */
 884        if (index >= E820_NR_ENTRIES) {
 885            return -EBUSY;
 886        }
 887        entry = &e820_reserve.entry[index++];
 888
 889        entry->address = cpu_to_le64(address);
 890        entry->length = cpu_to_le64(length);
 891        entry->type = cpu_to_le32(type);
 892
 893        e820_reserve.count = cpu_to_le32(index);
 894    }
 895
 896    /* new "etc/e820" file -- include ram too */
 897    e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
 898    e820_table[e820_entries].address = cpu_to_le64(address);
 899    e820_table[e820_entries].length = cpu_to_le64(length);
 900    e820_table[e820_entries].type = cpu_to_le32(type);
 901    e820_entries++;
 902
 903    return e820_entries;
 904}
 905
 906int e820_get_num_entries(void)
 907{
 908    return e820_entries;
 909}
 910
 911bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
 912{
 913    if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) {
 914        *address = le64_to_cpu(e820_table[idx].address);
 915        *length = le64_to_cpu(e820_table[idx].length);
 916        return true;
 917    }
 918    return false;
 919}
 920
 921/* Calculates initial APIC ID for a specific CPU index
 922 *
 923 * Currently we need to be able to calculate the APIC ID from the CPU index
 924 * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
 925 * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
 926 * all CPUs up to max_cpus.
 927 */
 928static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms,
 929                                           unsigned int cpu_index)
 930{
 931    MachineState *ms = MACHINE(pcms);
 932    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 933    uint32_t correct_id;
 934    static bool warned;
 935
 936    correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores,
 937                                         ms->smp.threads, cpu_index);
 938    if (pcmc->compat_apic_id_mode) {
 939        if (cpu_index != correct_id && !warned && !qtest_enabled()) {
 940            error_report("APIC IDs set in compatibility mode, "
 941                         "CPU topology won't match the configuration");
 942            warned = true;
 943        }
 944        return cpu_index;
 945    } else {
 946        return correct_id;
 947    }
 948}
 949
 950static void pc_build_smbios(PCMachineState *pcms)
 951{
 952    uint8_t *smbios_tables, *smbios_anchor;
 953    size_t smbios_tables_len, smbios_anchor_len;
 954    struct smbios_phys_mem_area *mem_array;
 955    unsigned i, array_count;
 956    MachineState *ms = MACHINE(pcms);
 957    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
 958
 959    /* tell smbios about cpuid version and features */
 960    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
 961
 962    smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len);
 963    if (smbios_tables) {
 964        fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES,
 965                         smbios_tables, smbios_tables_len);
 966    }
 967
 968    /* build the array of physical mem area from e820 table */
 969    mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries());
 970    for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) {
 971        uint64_t addr, len;
 972
 973        if (e820_get_entry(i, E820_RAM, &addr, &len)) {
 974            mem_array[array_count].address = addr;
 975            mem_array[array_count].length = len;
 976            array_count++;
 977        }
 978    }
 979    smbios_get_tables(ms, mem_array, array_count,
 980                      &smbios_tables, &smbios_tables_len,
 981                      &smbios_anchor, &smbios_anchor_len);
 982    g_free(mem_array);
 983
 984    if (smbios_anchor) {
 985        fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-tables",
 986                        smbios_tables, smbios_tables_len);
 987        fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-anchor",
 988                        smbios_anchor, smbios_anchor_len);
 989    }
 990}
 991
 992static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
 993{
 994    FWCfgState *fw_cfg;
 995    uint64_t *numa_fw_cfg;
 996    int i;
 997    const CPUArchIdList *cpus;
 998    MachineClass *mc = MACHINE_GET_CLASS(pcms);
 999
1000    fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
1001    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
1002
1003    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
1004     *
1005     * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for
1006     * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table,
1007     * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface
1008     * for CPU hotplug also uses APIC ID and not "CPU index".
1009     * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs",
1010     * but the "limit to the APIC ID values SeaBIOS may see".
1011     *
1012     * So for compatibility reasons with old BIOSes we are stuck with
1013     * "etc/max-cpus" actually being apic_id_limit
1014     */
1015    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
1016    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
1017    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
1018                     acpi_tables, acpi_tables_len);
1019    fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
1020
1021    fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
1022                     &e820_reserve, sizeof(e820_reserve));
1023    fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
1024                    sizeof(struct e820_entry) * e820_entries);
1025
1026    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
1027    /* allocate memory for the NUMA channel: one (64bit) word for the number
1028     * of nodes, one word for each VCPU->node and one word for each node to
1029     * hold the amount of memory.
1030     */
1031    numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
1032    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
1033    cpus = mc->possible_cpu_arch_ids(MACHINE(pcms));
1034    for (i = 0; i < cpus->len; i++) {
1035        unsigned int apic_id = cpus->cpus[i].arch_id;
1036        assert(apic_id < pcms->apic_id_limit);
1037        numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
1038    }
1039    for (i = 0; i < nb_numa_nodes; i++) {
1040        numa_fw_cfg[pcms->apic_id_limit + 1 + i] =
1041            cpu_to_le64(numa_info[i].node_mem);
1042    }
1043    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
1044                     (1 + pcms->apic_id_limit + nb_numa_nodes) *
1045                     sizeof(*numa_fw_cfg));
1046
1047    return fw_cfg;
1048}
1049
1050static long get_file_size(FILE *f)
1051{
1052    long where, size;
1053
1054    /* XXX: on Unix systems, using fstat() probably makes more sense */
1055
1056    where = ftell(f);
1057    fseek(f, 0, SEEK_END);
1058    size = ftell(f);
1059    fseek(f, where, SEEK_SET);
1060
1061    return size;
1062}
1063
1064struct setup_data {
1065    uint64_t next;
1066    uint32_t type;
1067    uint32_t len;
1068    uint8_t data[0];
1069} __attribute__((packed));
1070
1071
1072/*
1073 * The entry point into the kernel for PVH boot is different from
1074 * the native entry point.  The PVH entry is defined by the x86/HVM
1075 * direct boot ABI and is available in an ELFNOTE in the kernel binary.
1076 *
1077 * This function is passed to load_elf() when it is called from
1078 * load_elfboot() which then additionally checks for an ELF Note of
1079 * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
1080 * parse the PVH entry address from the ELF Note.
1081 *
1082 * Due to trickery in elf_opts.h, load_elf() is actually available as
1083 * load_elf32() or load_elf64() and this routine needs to be able
1084 * to deal with being called as 32 or 64 bit.
1085 *
1086 * The address of the PVH entry point is saved to the 'pvh_start_addr'
1087 * global variable.  (although the entry point is 32-bit, the kernel
1088 * binary can be either 32-bit or 64-bit).
1089 */
1090static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
1091{
1092    size_t *elf_note_data_addr;
1093
1094    /* Check if ELF Note header passed in is valid */
1095    if (arg1 == NULL) {
1096        return 0;
1097    }
1098
1099    if (is64) {
1100        struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
1101        uint64_t nhdr_size64 = sizeof(struct elf64_note);
1102        uint64_t phdr_align = *(uint64_t *)arg2;
1103        uint64_t nhdr_namesz = nhdr64->n_namesz;
1104
1105        elf_note_data_addr =
1106            ((void *)nhdr64) + nhdr_size64 +
1107            QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
1108    } else {
1109        struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
1110        uint32_t nhdr_size32 = sizeof(struct elf32_note);
1111        uint32_t phdr_align = *(uint32_t *)arg2;
1112        uint32_t nhdr_namesz = nhdr32->n_namesz;
1113
1114        elf_note_data_addr =
1115            ((void *)nhdr32) + nhdr_size32 +
1116            QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
1117    }
1118
1119    pvh_start_addr = *elf_note_data_addr;
1120
1121    return pvh_start_addr;
1122}
1123
1124static bool load_elfboot(const char *kernel_filename,
1125                   int kernel_file_size,
1126                   uint8_t *header,
1127                   size_t pvh_xen_start_addr,
1128                   FWCfgState *fw_cfg)
1129{
1130    uint32_t flags = 0;
1131    uint32_t mh_load_addr = 0;
1132    uint32_t elf_kernel_size = 0;
1133    uint64_t elf_entry;
1134    uint64_t elf_low, elf_high;
1135    int kernel_size;
1136
1137    if (ldl_p(header) != 0x464c457f) {
1138        return false; /* no elfboot */
1139    }
1140
1141    bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
1142    flags = elf_is64 ?
1143        ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
1144
1145    if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
1146        error_report("elfboot unsupported flags = %x", flags);
1147        exit(1);
1148    }
1149
1150    uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
1151    kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
1152                           NULL, &elf_note_type, &elf_entry,
1153                           &elf_low, &elf_high, 0, I386_ELF_MACHINE,
1154                           0, 0);
1155
1156    if (kernel_size < 0) {
1157        error_report("Error while loading elf kernel");
1158        exit(1);
1159    }
1160    mh_load_addr = elf_low;
1161    elf_kernel_size = elf_high - elf_low;
1162
1163    if (pvh_start_addr == 0) {
1164        error_report("Error loading uncompressed kernel without PVH ELF Note");
1165        exit(1);
1166    }
1167    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
1168    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
1169    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
1170
1171    return true;
1172}
1173
1174static void load_linux(PCMachineState *pcms,
1175                       FWCfgState *fw_cfg)
1176{
1177    uint16_t protocol;
1178    int setup_size, kernel_size, cmdline_size;
1179    int dtb_size, setup_data_offset;
1180    uint32_t initrd_max;
1181    uint8_t header[8192], *setup, *kernel;
1182    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
1183    FILE *f;
1184    char *vmode;
1185    MachineState *machine = MACHINE(pcms);
1186    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1187    struct setup_data *setup_data;
1188    const char *kernel_filename = machine->kernel_filename;
1189    const char *initrd_filename = machine->initrd_filename;
1190    const char *dtb_filename = machine->dtb;
1191    const char *kernel_cmdline = machine->kernel_cmdline;
1192
1193    /* Align to 16 bytes as a paranoia measure */
1194    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
1195
1196    /* load the kernel header */
1197    f = fopen(kernel_filename, "rb");
1198    if (!f || !(kernel_size = get_file_size(f)) ||
1199        fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
1200        MIN(ARRAY_SIZE(header), kernel_size)) {
1201        fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
1202                kernel_filename, strerror(errno));
1203        exit(1);
1204    }
1205
1206    /* kernel protocol version */
1207#if 0
1208    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
1209#endif
1210    if (ldl_p(header+0x202) == 0x53726448) {
1211        protocol = lduw_p(header+0x206);
1212    } else {
1213        /*
1214         * This could be a multiboot kernel. If it is, let's stop treating it
1215         * like a Linux kernel.
1216         * Note: some multiboot images could be in the ELF format (the same of
1217         * PVH), so we try multiboot first since we check the multiboot magic
1218         * header before to load it.
1219         */
1220        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
1221                           kernel_cmdline, kernel_size, header)) {
1222            return;
1223        }
1224        /*
1225         * Check if the file is an uncompressed kernel file (ELF) and load it,
1226         * saving the PVH entry point used by the x86/HVM direct boot ABI.
1227         * If load_elfboot() is successful, populate the fw_cfg info.
1228         */
1229        if (pcmc->pvh_enabled &&
1230            load_elfboot(kernel_filename, kernel_size,
1231                         header, pvh_start_addr, fw_cfg)) {
1232            fclose(f);
1233
1234            fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
1235                strlen(kernel_cmdline) + 1);
1236            fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
1237
1238            fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
1239            fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
1240                             header, sizeof(header));
1241
1242            /* load initrd */
1243            if (initrd_filename) {
1244                gsize initrd_size;
1245                gchar *initrd_data;
1246                GError *gerr = NULL;
1247
1248                if (!g_file_get_contents(initrd_filename, &initrd_data,
1249                            &initrd_size, &gerr)) {
1250                    fprintf(stderr, "qemu: error reading initrd %s: %s\n",
1251                            initrd_filename, gerr->message);
1252                    exit(1);
1253                }
1254
1255                initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1;
1256                if (initrd_size >= initrd_max) {
1257                    fprintf(stderr, "qemu: initrd is too large, cannot support."
1258                            "(max: %"PRIu32", need %"PRId64")\n",
1259                            initrd_max, (uint64_t)initrd_size);
1260                    exit(1);
1261                }
1262
1263                initrd_addr = (initrd_max - initrd_size) & ~4095;
1264
1265                fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
1266                fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
1267                fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data,
1268                                 initrd_size);
1269            }
1270
1271            option_rom[nb_option_roms].bootindex = 0;
1272            option_rom[nb_option_roms].name = "pvh.bin";
1273            nb_option_roms++;
1274
1275            return;
1276        }
1277        protocol = 0;
1278    }
1279
1280    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
1281        /* Low kernel */
1282        real_addr    = 0x90000;
1283        cmdline_addr = 0x9a000 - cmdline_size;
1284        prot_addr    = 0x10000;
1285    } else if (protocol < 0x202) {
1286        /* High but ancient kernel */
1287        real_addr    = 0x90000;
1288        cmdline_addr = 0x9a000 - cmdline_size;
1289        prot_addr    = 0x100000;
1290    } else {
1291        /* High and recent kernel */
1292        real_addr    = 0x10000;
1293        cmdline_addr = 0x20000;
1294        prot_addr    = 0x100000;
1295    }
1296
1297#if 0
1298    fprintf(stderr,
1299            "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
1300            "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
1301            "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
1302            real_addr,
1303            cmdline_addr,
1304            prot_addr);
1305#endif
1306
1307    /* highest address for loading the initrd */
1308    if (protocol >= 0x20c &&
1309        lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
1310        /*
1311         * Linux has supported initrd up to 4 GB for a very long time (2007,
1312         * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
1313         * though it only sets initrd_max to 2 GB to "work around bootloader
1314         * bugs". Luckily, QEMU firmware(which does something like bootloader)
1315         * has supported this.
1316         *
1317         * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can
1318         * be loaded into any address.
1319         *
1320         * In addition, initrd_max is uint32_t simply because QEMU doesn't
1321         * support the 64-bit boot protocol (specifically the ext_ramdisk_image
1322         * field).
1323         *
1324         * Therefore here just limit initrd_max to UINT32_MAX simply as well.
1325         */
1326        initrd_max = UINT32_MAX;
1327    } else if (protocol >= 0x203) {
1328        initrd_max = ldl_p(header+0x22c);
1329    } else {
1330        initrd_max = 0x37ffffff;
1331    }
1332
1333    if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) {
1334        initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1;
1335    }
1336
1337    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
1338    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
1339    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
1340
1341    if (protocol >= 0x202) {
1342        stl_p(header+0x228, cmdline_addr);
1343    } else {
1344        stw_p(header+0x20, 0xA33F);
1345        stw_p(header+0x22, cmdline_addr-real_addr);
1346    }
1347
1348    /* handle vga= parameter */
1349    vmode = strstr(kernel_cmdline, "vga=");
1350    if (vmode) {
1351        unsigned int video_mode;
1352        /* skip "vga=" */
1353        vmode += 4;
1354        if (!strncmp(vmode, "normal", 6)) {
1355            video_mode = 0xffff;
1356        } else if (!strncmp(vmode, "ext", 3)) {
1357            video_mode = 0xfffe;
1358        } else if (!strncmp(vmode, "ask", 3)) {
1359            video_mode = 0xfffd;
1360        } else {
1361            video_mode = strtol(vmode, NULL, 0);
1362        }
1363        stw_p(header+0x1fa, video_mode);
1364    }
1365
1366    /* loader type */
1367    /* High nybble = B reserved for QEMU; low nybble is revision number.
1368       If this code is substantially changed, you may want to consider
1369       incrementing the revision. */
1370    if (protocol >= 0x200) {
1371        header[0x210] = 0xB0;
1372    }
1373    /* heap */
1374    if (protocol >= 0x201) {
1375        header[0x211] |= 0x80;  /* CAN_USE_HEAP */
1376        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
1377    }
1378
1379    /* load initrd */
1380    if (initrd_filename) {
1381        gsize initrd_size;
1382        gchar *initrd_data;
1383        GError *gerr = NULL;
1384
1385        if (protocol < 0x200) {
1386            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
1387            exit(1);
1388        }
1389
1390        if (!g_file_get_contents(initrd_filename, &initrd_data,
1391                                 &initrd_size, &gerr)) {
1392            fprintf(stderr, "qemu: error reading initrd %s: %s\n",
1393                    initrd_filename, gerr->message);
1394            exit(1);
1395        }
1396        if (initrd_size >= initrd_max) {
1397            fprintf(stderr, "qemu: initrd is too large, cannot support."
1398                    "(max: %"PRIu32", need %"PRId64")\n",
1399                    initrd_max, (uint64_t)initrd_size);
1400            exit(1);
1401        }
1402
1403        initrd_addr = (initrd_max-initrd_size) & ~4095;
1404
1405        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
1406        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
1407        fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
1408
1409        stl_p(header+0x218, initrd_addr);
1410        stl_p(header+0x21c, initrd_size);
1411    }
1412
1413    /* load kernel and setup */
1414    setup_size = header[0x1f1];
1415    if (setup_size == 0) {
1416        setup_size = 4;
1417    }
1418    setup_size = (setup_size+1)*512;
1419    if (setup_size > kernel_size) {
1420        fprintf(stderr, "qemu: invalid kernel header\n");
1421        exit(1);
1422    }
1423    kernel_size -= setup_size;
1424
1425    setup  = g_malloc(setup_size);
1426    kernel = g_malloc(kernel_size);
1427    fseek(f, 0, SEEK_SET);
1428    if (fread(setup, 1, setup_size, f) != setup_size) {
1429        fprintf(stderr, "fread() failed\n");
1430        exit(1);
1431    }
1432    if (fread(kernel, 1, kernel_size, f) != kernel_size) {
1433        fprintf(stderr, "fread() failed\n");
1434        exit(1);
1435    }
1436    fclose(f);
1437
1438    /* append dtb to kernel */
1439    if (dtb_filename) {
1440        if (protocol < 0x209) {
1441            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
1442            exit(1);
1443        }
1444
1445        dtb_size = get_image_size(dtb_filename);
1446        if (dtb_size <= 0) {
1447            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
1448                    dtb_filename, strerror(errno));
1449            exit(1);
1450        }
1451
1452        setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
1453        kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
1454        kernel = g_realloc(kernel, kernel_size);
1455
1456        stq_p(header+0x250, prot_addr + setup_data_offset);
1457
1458        setup_data = (struct setup_data *)(kernel + setup_data_offset);
1459        setup_data->next = 0;
1460        setup_data->type = cpu_to_le32(SETUP_DTB);
1461        setup_data->len = cpu_to_le32(dtb_size);
1462
1463        load_image_size(dtb_filename, setup_data->data, dtb_size);
1464    }
1465
1466    memcpy(setup, header, MIN(sizeof(header), setup_size));
1467
1468    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
1469    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
1470    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
1471
1472    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
1473    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
1474    fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
1475
1476    option_rom[nb_option_roms].bootindex = 0;
1477    option_rom[nb_option_roms].name = "linuxboot.bin";
1478    if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) {
1479        option_rom[nb_option_roms].name = "linuxboot_dma.bin";
1480    }
1481    nb_option_roms++;
1482}
1483
1484#define NE2000_NB_MAX 6
1485
1486static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
1487                                              0x280, 0x380 };
1488static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
1489
1490void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
1491{
1492    static int nb_ne2k = 0;
1493
1494    if (nb_ne2k == NE2000_NB_MAX)
1495        return;
1496    isa_ne2000_init(bus, ne2000_io[nb_ne2k],
1497                    ne2000_irq[nb_ne2k], nd);
1498    nb_ne2k++;
1499}
1500
1501DeviceState *cpu_get_current_apic(void)
1502{
1503    if (current_cpu) {
1504        X86CPU *cpu = X86_CPU(current_cpu);
1505        return cpu->apic_state;
1506    } else {
1507        return NULL;
1508    }
1509}
1510
1511void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
1512{
1513    X86CPU *cpu = opaque;
1514
1515    if (level) {
1516        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
1517    }
1518}
1519
1520static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp)
1521{
1522    Object *cpu = NULL;
1523    Error *local_err = NULL;
1524    CPUX86State *env = NULL;
1525
1526    cpu = object_new(MACHINE(pcms)->cpu_type);
1527
1528    env = &X86_CPU(cpu)->env;
1529    env->nr_dies = pcms->smp_dies;
1530
1531    object_property_set_uint(cpu, apic_id, "apic-id", &local_err);
1532    object_property_set_bool(cpu, true, "realized", &local_err);
1533
1534    object_unref(cpu);
1535    error_propagate(errp, local_err);
1536}
1537
1538/*
1539 * This function is very similar to smp_parse()
1540 * in hw/core/machine.c but includes CPU die support.
1541 */
1542void pc_smp_parse(MachineState *ms, QemuOpts *opts)
1543{
1544    PCMachineState *pcms = PC_MACHINE(ms);
1545
1546    if (opts) {
1547        unsigned cpus    = qemu_opt_get_number(opts, "cpus", 0);
1548        unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
1549        unsigned dies = qemu_opt_get_number(opts, "dies", 1);
1550        unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
1551        unsigned threads = qemu_opt_get_number(opts, "threads", 0);
1552
1553        /* compute missing values, prefer sockets over cores over threads */
1554        if (cpus == 0 || sockets == 0) {
1555            cores = cores > 0 ? cores : 1;
1556            threads = threads > 0 ? threads : 1;
1557            if (cpus == 0) {
1558                sockets = sockets > 0 ? sockets : 1;
1559                cpus = cores * threads * dies * sockets;
1560            } else {
1561                ms->smp.max_cpus =
1562                        qemu_opt_get_number(opts, "maxcpus", cpus);
1563                sockets = ms->smp.max_cpus / (cores * threads * dies);
1564            }
1565        } else if (cores == 0) {
1566            threads = threads > 0 ? threads : 1;
1567            cores = cpus / (sockets * dies * threads);
1568            cores = cores > 0 ? cores : 1;
1569        } else if (threads == 0) {
1570            threads = cpus / (cores * dies * sockets);
1571            threads = threads > 0 ? threads : 1;
1572        } else if (sockets * dies * cores * threads < cpus) {
1573            error_report("cpu topology: "
1574                         "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
1575                         "smp_cpus (%u)",
1576                         sockets, dies, cores, threads, cpus);
1577            exit(1);
1578        }
1579
1580        ms->smp.max_cpus =
1581                qemu_opt_get_number(opts, "maxcpus", cpus);
1582
1583        if (ms->smp.max_cpus < cpus) {
1584            error_report("maxcpus must be equal to or greater than smp");
1585            exit(1);
1586        }
1587
1588        if (sockets * dies * cores * threads > ms->smp.max_cpus) {
1589            error_report("cpu topology: "
1590                         "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > "
1591                         "maxcpus (%u)",
1592                         sockets, dies, cores, threads,
1593                         ms->smp.max_cpus);
1594            exit(1);
1595        }
1596
1597        if (sockets * dies * cores * threads != ms->smp.max_cpus) {
1598            warn_report("Invalid CPU topology deprecated: "
1599                        "sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
1600                        "!= maxcpus (%u)",
1601                        sockets, dies, cores, threads,
1602                        ms->smp.max_cpus);
1603        }
1604
1605        ms->smp.cpus = cpus;
1606        ms->smp.cores = cores;
1607        ms->smp.threads = threads;
1608        pcms->smp_dies = dies;
1609    }
1610
1611    if (ms->smp.cpus > 1) {
1612        Error *blocker = NULL;
1613        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
1614        replay_add_blocker(blocker);
1615    }
1616}
1617
1618void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp)
1619{
1620    PCMachineState *pcms = PC_MACHINE(ms);
1621    int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id);
1622    Error *local_err = NULL;
1623
1624    if (id < 0) {
1625        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
1626        return;
1627    }
1628
1629    if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) {
1630        error_setg(errp, "Unable to add CPU: %" PRIi64
1631                   ", resulting APIC ID (%" PRIi64 ") is too large",
1632                   id, apic_id);
1633        return;
1634    }
1635
1636    pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err);
1637    if (local_err) {
1638        error_propagate(errp, local_err);
1639        return;
1640    }
1641}
1642
1643void pc_cpus_init(PCMachineState *pcms)
1644{
1645    int i;
1646    const CPUArchIdList *possible_cpus;
1647    MachineState *ms = MACHINE(pcms);
1648    MachineClass *mc = MACHINE_GET_CLASS(pcms);
1649    PCMachineClass *pcmc = PC_MACHINE_CLASS(mc);
1650
1651    x86_cpu_set_default_version(pcmc->default_cpu_version);
1652
1653    /* Calculates the limit to CPU APIC ID values
1654     *
1655     * Limit for the APIC ID value, so that all
1656     * CPU APIC IDs are < pcms->apic_id_limit.
1657     *
1658     * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
1659     */
1660    pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms,
1661                                                     ms->smp.max_cpus - 1) + 1;
1662    possible_cpus = mc->possible_cpu_arch_ids(ms);
1663    for (i = 0; i < ms->smp.cpus; i++) {
1664        pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal);
1665    }
1666}
1667
1668static void pc_build_feature_control_file(PCMachineState *pcms)
1669{
1670    MachineState *ms = MACHINE(pcms);
1671    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
1672    CPUX86State *env = &cpu->env;
1673    uint32_t unused, ecx, edx;
1674    uint64_t feature_control_bits = 0;
1675    uint64_t *val;
1676
1677    cpu_x86_cpuid(env, 1, 0, &unused, &unused, &ecx, &edx);
1678    if (ecx & CPUID_EXT_VMX) {
1679        feature_control_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1680    }
1681
1682    if ((edx & (CPUID_EXT2_MCE | CPUID_EXT2_MCA)) ==
1683        (CPUID_EXT2_MCE | CPUID_EXT2_MCA) &&
1684        (env->mcg_cap & MCG_LMCE_P)) {
1685        feature_control_bits |= FEATURE_CONTROL_LMCE;
1686    }
1687
1688    if (!feature_control_bits) {
1689        return;
1690    }
1691
1692    val = g_malloc(sizeof(*val));
1693    *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED);
1694    fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
1695}
1696
1697static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count)
1698{
1699    if (cpus_count > 0xff) {
1700        /* If the number of CPUs can't be represented in 8 bits, the
1701         * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just
1702         * to make old BIOSes fail more predictably.
1703         */
1704        rtc_set_memory(rtc, 0x5f, 0);
1705    } else {
1706        rtc_set_memory(rtc, 0x5f, cpus_count - 1);
1707    }
1708}
1709
1710static
1711void pc_machine_done(Notifier *notifier, void *data)
1712{
1713    PCMachineState *pcms = container_of(notifier,
1714                                        PCMachineState, machine_done);
1715    PCIBus *bus = pcms->bus;
1716
1717    /* set the number of CPUs */
1718    rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
1719
1720    if (bus) {
1721        int extra_hosts = 0;
1722
1723        QLIST_FOREACH(bus, &bus->child, sibling) {
1724            /* look for expander root buses */
1725            if (pci_bus_is_root(bus)) {
1726                extra_hosts++;
1727            }
1728        }
1729        if (extra_hosts && pcms->fw_cfg) {
1730            uint64_t *val = g_malloc(sizeof(*val));
1731            *val = cpu_to_le64(extra_hosts);
1732            fw_cfg_add_file(pcms->fw_cfg,
1733                    "etc/extra-pci-roots", val, sizeof(*val));
1734        }
1735    }
1736
1737    acpi_setup();
1738    if (pcms->fw_cfg) {
1739        pc_build_smbios(pcms);
1740        pc_build_feature_control_file(pcms);
1741        /* update FW_CFG_NB_CPUS to account for -device added CPUs */
1742        fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
1743    }
1744
1745    if (pcms->apic_id_limit > 255 && !xen_enabled()) {
1746        IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
1747
1748        if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) ||
1749            iommu->intr_eim != ON_OFF_AUTO_ON) {
1750            error_report("current -smp configuration requires "
1751                         "Extended Interrupt Mode enabled. "
1752                         "You can add an IOMMU using: "
1753                         "-device intel-iommu,intremap=on,eim=on");
1754            exit(EXIT_FAILURE);
1755        }
1756    }
1757}
1758
1759void pc_guest_info_init(PCMachineState *pcms)
1760{
1761    int i;
1762
1763    pcms->apic_xrupt_override = kvm_allows_irq0_override();
1764    pcms->numa_nodes = nb_numa_nodes;
1765    pcms->node_mem = g_malloc0(pcms->numa_nodes *
1766                                    sizeof *pcms->node_mem);
1767    for (i = 0; i < nb_numa_nodes; i++) {
1768        pcms->node_mem[i] = numa_info[i].node_mem;
1769    }
1770
1771    pcms->machine_done.notify = pc_machine_done;
1772    qemu_add_machine_init_done_notifier(&pcms->machine_done);
1773}
1774
1775/* setup pci memory address space mapping into system address space */
1776void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
1777                            MemoryRegion *pci_address_space)
1778{
1779    /* Set to lower priority than RAM */
1780    memory_region_add_subregion_overlap(system_memory, 0x0,
1781                                        pci_address_space, -1);
1782}
1783
1784void xen_load_linux(PCMachineState *pcms)
1785{
1786    int i;
1787    FWCfgState *fw_cfg;
1788
1789    assert(MACHINE(pcms)->kernel_filename != NULL);
1790
1791    fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE);
1792    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
1793    rom_set_fw(fw_cfg);
1794
1795    load_linux(pcms, fw_cfg);
1796    for (i = 0; i < nb_option_roms; i++) {
1797        assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
1798               !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
1799               !strcmp(option_rom[i].name, "pvh.bin") ||
1800               !strcmp(option_rom[i].name, "multiboot.bin"));
1801        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1802    }
1803    pcms->fw_cfg = fw_cfg;
1804}
1805
1806void pc_memory_init(PCMachineState *pcms,
1807                    MemoryRegion *system_memory,
1808                    MemoryRegion *rom_memory,
1809                    MemoryRegion **ram_memory)
1810{
1811    int linux_boot, i;
1812    MemoryRegion *ram, *option_rom_mr;
1813    MemoryRegion *ram_below_4g, *ram_above_4g;
1814    FWCfgState *fw_cfg;
1815    MachineState *machine = MACHINE(pcms);
1816    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1817
1818    assert(machine->ram_size == pcms->below_4g_mem_size +
1819                                pcms->above_4g_mem_size);
1820
1821    linux_boot = (machine->kernel_filename != NULL);
1822
1823    /* Allocate RAM.  We allocate it as a single memory region and use
1824     * aliases to address portions of it, mostly for backwards compatibility
1825     * with older qemus that used qemu_ram_alloc().
1826     */
1827    ram = g_malloc(sizeof(*ram));
1828    memory_region_allocate_system_memory(ram, NULL, "pc.ram",
1829                                         machine->ram_size);
1830    *ram_memory = ram;
1831    ram_below_4g = g_malloc(sizeof(*ram_below_4g));
1832    memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
1833                             0, pcms->below_4g_mem_size);
1834    memory_region_add_subregion(system_memory, 0, ram_below_4g);
1835    e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM);
1836    if (pcms->above_4g_mem_size > 0) {
1837        ram_above_4g = g_malloc(sizeof(*ram_above_4g));
1838        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
1839                                 pcms->below_4g_mem_size,
1840                                 pcms->above_4g_mem_size);
1841        memory_region_add_subregion(system_memory, 0x100000000ULL,
1842                                    ram_above_4g);
1843        e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM);
1844    }
1845
1846    if (!pcmc->has_reserved_memory &&
1847        (machine->ram_slots ||
1848         (machine->maxram_size > machine->ram_size))) {
1849        MachineClass *mc = MACHINE_GET_CLASS(machine);
1850
1851        error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
1852                     mc->name);
1853        exit(EXIT_FAILURE);
1854    }
1855
1856    /* always allocate the device memory information */
1857    machine->device_memory = g_malloc0(sizeof(*machine->device_memory));
1858
1859    /* initialize device memory address space */
1860    if (pcmc->has_reserved_memory &&
1861        (machine->ram_size < machine->maxram_size)) {
1862        ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
1863
1864        if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
1865            error_report("unsupported amount of memory slots: %"PRIu64,
1866                         machine->ram_slots);
1867            exit(EXIT_FAILURE);
1868        }
1869
1870        if (QEMU_ALIGN_UP(machine->maxram_size,
1871                          TARGET_PAGE_SIZE) != machine->maxram_size) {
1872            error_report("maximum memory size must by aligned to multiple of "
1873                         "%d bytes", TARGET_PAGE_SIZE);
1874            exit(EXIT_FAILURE);
1875        }
1876
1877        machine->device_memory->base =
1878            ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1 * GiB);
1879
1880        if (pcmc->enforce_aligned_dimm) {
1881            /* size device region assuming 1G page max alignment per slot */
1882            device_mem_size += (1 * GiB) * machine->ram_slots;
1883        }
1884
1885        if ((machine->device_memory->base + device_mem_size) <
1886            device_mem_size) {
1887            error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
1888                         machine->maxram_size);
1889            exit(EXIT_FAILURE);
1890        }
1891
1892        memory_region_init(&machine->device_memory->mr, OBJECT(pcms),
1893                           "device-memory", device_mem_size);
1894        memory_region_add_subregion(system_memory, machine->device_memory->base,
1895                                    &machine->device_memory->mr);
1896    }
1897
1898    /* Initialize PC system firmware */
1899    pc_system_firmware_init(pcms, rom_memory);
1900
1901    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
1902    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
1903                           &error_fatal);
1904    if (pcmc->pci_enabled) {
1905        memory_region_set_readonly(option_rom_mr, true);
1906    }
1907    memory_region_add_subregion_overlap(rom_memory,
1908                                        PC_ROM_MIN_VGA,
1909                                        option_rom_mr,
1910                                        1);
1911
1912    fw_cfg = bochs_bios_init(&address_space_memory, pcms);
1913
1914    rom_set_fw(fw_cfg);
1915
1916    if (pcmc->has_reserved_memory && machine->device_memory->base) {
1917        uint64_t *val = g_malloc(sizeof(*val));
1918        PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1919        uint64_t res_mem_end = machine->device_memory->base;
1920
1921        if (!pcmc->broken_reserved_end) {
1922            res_mem_end += memory_region_size(&machine->device_memory->mr);
1923        }
1924        *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
1925        fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
1926    }
1927
1928    if (linux_boot) {
1929        load_linux(pcms, fw_cfg);
1930    }
1931
1932    for (i = 0; i < nb_option_roms; i++) {
1933        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1934    }
1935    pcms->fw_cfg = fw_cfg;
1936
1937    /* Init default IOAPIC address space */
1938    pcms->ioapic_as = &address_space_memory;
1939}
1940
1941/*
1942 * The 64bit pci hole starts after "above 4G RAM" and
1943 * potentially the space reserved for memory hotplug.
1944 */
1945uint64_t pc_pci_hole64_start(void)
1946{
1947    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
1948    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1949    MachineState *ms = MACHINE(pcms);
1950    uint64_t hole64_start = 0;
1951
1952    if (pcmc->has_reserved_memory && ms->device_memory->base) {
1953        hole64_start = ms->device_memory->base;
1954        if (!pcmc->broken_reserved_end) {
1955            hole64_start += memory_region_size(&ms->device_memory->mr);
1956        }
1957    } else {
1958        hole64_start = 0x100000000ULL + pcms->above_4g_mem_size;
1959    }
1960
1961    return ROUND_UP(hole64_start, 1 * GiB);
1962}
1963
1964qemu_irq pc_allocate_cpu_irq(void)
1965{
1966    return qemu_allocate_irq(pic_irq_request, NULL, 0);
1967}
1968
1969DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
1970{
1971    DeviceState *dev = NULL;
1972
1973    rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
1974    if (pci_bus) {
1975        PCIDevice *pcidev = pci_vga_init(pci_bus);
1976        dev = pcidev ? &pcidev->qdev : NULL;
1977    } else if (isa_bus) {
1978        ISADevice *isadev = isa_vga_init(isa_bus);
1979        dev = isadev ? DEVICE(isadev) : NULL;
1980    }
1981    rom_reset_order_override();
1982    return dev;
1983}
1984
1985static const MemoryRegionOps ioport80_io_ops = {
1986    .write = ioport80_write,
1987    .read = ioport80_read,
1988    .endianness = DEVICE_NATIVE_ENDIAN,
1989    .impl = {
1990        .min_access_size = 1,
1991        .max_access_size = 1,
1992    },
1993};
1994
1995static const MemoryRegionOps ioportF0_io_ops = {
1996    .write = ioportF0_write,
1997    .read = ioportF0_read,
1998    .endianness = DEVICE_NATIVE_ENDIAN,
1999    .impl = {
2000        .min_access_size = 1,
2001        .max_access_size = 1,
2002    },
2003};
2004
2005static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport)
2006{
2007    int i;
2008    DriveInfo *fd[MAX_FD];
2009    qemu_irq *a20_line;
2010    ISADevice *i8042, *port92, *vmmouse;
2011
2012    serial_hds_isa_init(isa_bus, 0, MAX_ISA_SERIAL_PORTS);
2013    parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
2014
2015    for (i = 0; i < MAX_FD; i++) {
2016        fd[i] = drive_get(IF_FLOPPY, 0, i);
2017        create_fdctrl |= !!fd[i];
2018    }
2019    if (create_fdctrl) {
2020        fdctrl_init_isa(isa_bus, fd);
2021    }
2022
2023    i8042 = isa_create_simple(isa_bus, "i8042");
2024    if (!no_vmport) {
2025        vmport_init(isa_bus);
2026        vmmouse = isa_try_create(isa_bus, "vmmouse");
2027    } else {
2028        vmmouse = NULL;
2029    }
2030    if (vmmouse) {
2031        DeviceState *dev = DEVICE(vmmouse);
2032        qdev_prop_set_ptr(dev, "ps2_mouse", i8042);
2033        qdev_init_nofail(dev);
2034    }
2035    port92 = isa_create_simple(isa_bus, "port92");
2036
2037    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
2038    i8042_setup_a20_line(i8042, a20_line[0]);
2039    port92_init(port92, a20_line[1]);
2040    g_free(a20_line);
2041}
2042
2043void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
2044                          ISADevice **rtc_state,
2045                          bool create_fdctrl,
2046                          bool no_vmport,
2047                          bool has_pit,
2048                          uint32_t hpet_irqs)
2049{
2050    int i;
2051    DeviceState *hpet = NULL;
2052    int pit_isa_irq = 0;
2053    qemu_irq pit_alt_irq = NULL;
2054    qemu_irq rtc_irq = NULL;
2055    ISADevice *pit = NULL;
2056    MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
2057    MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
2058
2059    memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
2060    memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
2061
2062    memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
2063    memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
2064
2065    /*
2066     * Check if an HPET shall be created.
2067     *
2068     * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
2069     * when the HPET wants to take over. Thus we have to disable the latter.
2070     */
2071    if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
2072        /* In order to set property, here not using sysbus_try_create_simple */
2073        hpet = qdev_try_create(NULL, TYPE_HPET);
2074        if (hpet) {
2075            /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
2076             * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
2077             * IRQ8 and IRQ2.
2078             */
2079            uint8_t compat = object_property_get_uint(OBJECT(hpet),
2080                    HPET_INTCAP, NULL);
2081            if (!compat) {
2082                qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
2083            }
2084            qdev_init_nofail(hpet);
2085            sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
2086
2087            for (i = 0; i < GSI_NUM_PINS; i++) {
2088                sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
2089            }
2090            pit_isa_irq = -1;
2091            pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
2092            rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
2093        }
2094    }
2095    *rtc_state = mc146818_rtc_init(isa_bus, 2000, rtc_irq);
2096
2097    qemu_register_boot_set(pc_boot_set, *rtc_state);
2098
2099    if (!xen_enabled() && has_pit) {
2100        if (kvm_pit_in_kernel()) {
2101            pit = kvm_pit_init(isa_bus, 0x40);
2102        } else {
2103            pit = i8254_pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
2104        }
2105        if (hpet) {
2106            /* connect PIT to output control line of the HPET */
2107            qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
2108        }
2109        pcspk_init(isa_bus, pit);
2110    }
2111
2112    i8257_dma_init(isa_bus, 0);
2113
2114    /* Super I/O */
2115    pc_superio_init(isa_bus, create_fdctrl, no_vmport);
2116}
2117
2118void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
2119{
2120    int i;
2121
2122    rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
2123    for (i = 0; i < nb_nics; i++) {
2124        NICInfo *nd = &nd_table[i];
2125        const char *model = nd->model ? nd->model : pcmc->default_nic_model;
2126
2127        if (g_str_equal(model, "ne2k_isa")) {
2128            pc_init_ne2k_isa(isa_bus, nd);
2129        } else {
2130            pci_nic_init_nofail(nd, pci_bus, model, NULL);
2131        }
2132    }
2133    rom_reset_order_override();
2134}
2135
2136void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
2137{
2138    DeviceState *dev;
2139    SysBusDevice *d;
2140    unsigned int i;
2141
2142    if (kvm_ioapic_in_kernel()) {
2143        dev = qdev_create(NULL, TYPE_KVM_IOAPIC);
2144    } else {
2145        dev = qdev_create(NULL, TYPE_IOAPIC);
2146    }
2147    if (parent_name) {
2148        object_property_add_child(object_resolve_path(parent_name, NULL),
2149                                  "ioapic", OBJECT(dev), NULL);
2150    }
2151    qdev_init_nofail(dev);
2152    d = SYS_BUS_DEVICE(dev);
2153    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
2154
2155    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
2156        gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
2157    }
2158}
2159
2160static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
2161                               Error **errp)
2162{
2163    const PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2164    const PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
2165    const MachineState *ms = MACHINE(hotplug_dev);
2166    const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
2167    const uint64_t legacy_align = TARGET_PAGE_SIZE;
2168    Error *local_err = NULL;
2169
2170    /*
2171     * When -no-acpi is used with Q35 machine type, no ACPI is built,
2172     * but pcms->acpi_dev is still created. Check !acpi_enabled in
2173     * addition to cover this case.
2174     */
2175    if (!pcms->acpi_dev || !acpi_enabled) {
2176        error_setg(errp,
2177                   "memory hotplug is not enabled: missing acpi device or acpi disabled");
2178        return;
2179    }
2180
2181    if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
2182        error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
2183        return;
2184    }
2185
2186    hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err);
2187    if (local_err) {
2188        error_propagate(errp, local_err);
2189        return;
2190    }
2191
2192    pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev),
2193                     pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp);
2194}
2195
2196static void pc_memory_plug(HotplugHandler *hotplug_dev,
2197                           DeviceState *dev, Error **errp)
2198{
2199    Error *local_err = NULL;
2200    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2201    MachineState *ms = MACHINE(hotplug_dev);
2202    bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
2203
2204    pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms), &local_err);
2205    if (local_err) {
2206        goto out;
2207    }
2208
2209    if (is_nvdimm) {
2210        nvdimm_plug(ms->nvdimms_state);
2211    }
2212
2213    hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
2214out:
2215    error_propagate(errp, local_err);
2216}
2217
2218static void pc_memory_unplug_request(HotplugHandler *hotplug_dev,
2219                                     DeviceState *dev, Error **errp)
2220{
2221    Error *local_err = NULL;
2222    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2223
2224    /*
2225     * When -no-acpi is used with Q35 machine type, no ACPI is built,
2226     * but pcms->acpi_dev is still created. Check !acpi_enabled in
2227     * addition to cover this case.
2228     */
2229    if (!pcms->acpi_dev || !acpi_enabled) {
2230        error_setg(&local_err,
2231                   "memory hotplug is not enabled: missing acpi device or acpi disabled");
2232        goto out;
2233    }
2234
2235    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
2236        error_setg(&local_err,
2237                   "nvdimm device hot unplug is not supported yet.");
2238        goto out;
2239    }
2240
2241    hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
2242                                   &local_err);
2243out:
2244    error_propagate(errp, local_err);
2245}
2246
2247static void pc_memory_unplug(HotplugHandler *hotplug_dev,
2248                             DeviceState *dev, Error **errp)
2249{
2250    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2251    Error *local_err = NULL;
2252
2253    hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
2254    if (local_err) {
2255        goto out;
2256    }
2257
2258    pc_dimm_unplug(PC_DIMM(dev), MACHINE(pcms));
2259    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
2260 out:
2261    error_propagate(errp, local_err);
2262}
2263
2264static int pc_apic_cmp(const void *a, const void *b)
2265{
2266   CPUArchId *apic_a = (CPUArchId *)a;
2267   CPUArchId *apic_b = (CPUArchId *)b;
2268
2269   return apic_a->arch_id - apic_b->arch_id;
2270}
2271
2272/* returns pointer to CPUArchId descriptor that matches CPU's apic_id
2273 * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no
2274 * entry corresponding to CPU's apic_id returns NULL.
2275 */
2276static CPUArchId *pc_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
2277{
2278    CPUArchId apic_id, *found_cpu;
2279
2280    apic_id.arch_id = id;
2281    found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus,
2282        ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus),
2283        pc_apic_cmp);
2284    if (found_cpu && idx) {
2285        *idx = found_cpu - ms->possible_cpus->cpus;
2286    }
2287    return found_cpu;
2288}
2289
2290static void pc_cpu_plug(HotplugHandler *hotplug_dev,
2291                        DeviceState *dev, Error **errp)
2292{
2293    CPUArchId *found_cpu;
2294    Error *local_err = NULL;
2295    X86CPU *cpu = X86_CPU(dev);
2296    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2297
2298    if (pcms->acpi_dev) {
2299        hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
2300        if (local_err) {
2301            goto out;
2302        }
2303    }
2304
2305    /* increment the number of CPUs */
2306    pcms->boot_cpus++;
2307    if (pcms->rtc) {
2308        rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
2309    }
2310    if (pcms->fw_cfg) {
2311        fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
2312    }
2313
2314    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
2315    found_cpu->cpu = OBJECT(dev);
2316out:
2317    error_propagate(errp, local_err);
2318}
2319static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
2320                                     DeviceState *dev, Error **errp)
2321{
2322    int idx = -1;
2323    Error *local_err = NULL;
2324    X86CPU *cpu = X86_CPU(dev);
2325    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2326
2327    if (!pcms->acpi_dev) {
2328        error_setg(&local_err, "CPU hot unplug not supported without ACPI");
2329        goto out;
2330    }
2331
2332    pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
2333    assert(idx != -1);
2334    if (idx == 0) {
2335        error_setg(&local_err, "Boot CPU is unpluggable");
2336        goto out;
2337    }
2338
2339    hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
2340                                   &local_err);
2341    if (local_err) {
2342        goto out;
2343    }
2344
2345 out:
2346    error_propagate(errp, local_err);
2347
2348}
2349
2350static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
2351                             DeviceState *dev, Error **errp)
2352{
2353    CPUArchId *found_cpu;
2354    Error *local_err = NULL;
2355    X86CPU *cpu = X86_CPU(dev);
2356    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2357
2358    hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
2359    if (local_err) {
2360        goto out;
2361    }
2362
2363    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
2364    found_cpu->cpu = NULL;
2365    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
2366
2367    /* decrement the number of CPUs */
2368    pcms->boot_cpus--;
2369    /* Update the number of CPUs in CMOS */
2370    rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
2371    fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
2372 out:
2373    error_propagate(errp, local_err);
2374}
2375
2376static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
2377                            DeviceState *dev, Error **errp)
2378{
2379    int idx;
2380    CPUState *cs;
2381    CPUArchId *cpu_slot;
2382    X86CPUTopoInfo topo;
2383    X86CPU *cpu = X86_CPU(dev);
2384    CPUX86State *env = &cpu->env;
2385    MachineState *ms = MACHINE(hotplug_dev);
2386    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
2387    unsigned int smp_cores = ms->smp.cores;
2388    unsigned int smp_threads = ms->smp.threads;
2389
2390    if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
2391        error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
2392                   ms->cpu_type);
2393        return;
2394    }
2395
2396    env->nr_dies = pcms->smp_dies;
2397
2398    /*
2399     * If APIC ID is not set,
2400     * set it based on socket/die/core/thread properties.
2401     */
2402    if (cpu->apic_id == UNASSIGNED_APIC_ID) {
2403        int max_socket = (ms->smp.max_cpus - 1) /
2404                                smp_threads / smp_cores / pcms->smp_dies;
2405
2406        /*
2407         * die-id was optional in QEMU 4.0 and older, so keep it optional
2408         * if there's only one die per socket.
2409         */
2410        if (cpu->die_id < 0 && pcms->smp_dies == 1) {
2411            cpu->die_id = 0;
2412        }
2413
2414        if (cpu->socket_id < 0) {
2415            error_setg(errp, "CPU socket-id is not set");
2416            return;
2417        } else if (cpu->socket_id > max_socket) {
2418            error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u",
2419                       cpu->socket_id, max_socket);
2420            return;
2421        } else if (cpu->die_id > pcms->smp_dies - 1) {
2422            error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u",
2423                       cpu->die_id, max_socket);
2424            return;
2425        }
2426        if (cpu->core_id < 0) {
2427            error_setg(errp, "CPU core-id is not set");
2428            return;
2429        } else if (cpu->core_id > (smp_cores - 1)) {
2430            error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u",
2431                       cpu->core_id, smp_cores - 1);
2432            return;
2433        }
2434        if (cpu->thread_id < 0) {
2435            error_setg(errp, "CPU thread-id is not set");
2436            return;
2437        } else if (cpu->thread_id > (smp_threads - 1)) {
2438            error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u",
2439                       cpu->thread_id, smp_threads - 1);
2440            return;
2441        }
2442
2443        topo.pkg_id = cpu->socket_id;
2444        topo.die_id = cpu->die_id;
2445        topo.core_id = cpu->core_id;
2446        topo.smt_id = cpu->thread_id;
2447        cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores,
2448                                            smp_threads, &topo);
2449    }
2450
2451    cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
2452    if (!cpu_slot) {
2453        MachineState *ms = MACHINE(pcms);
2454
2455        x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies,
2456                                 smp_cores, smp_threads, &topo);
2457        error_setg(errp,
2458            "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with"
2459            " APIC ID %" PRIu32 ", valid index range 0:%d",
2460            topo.pkg_id, topo.die_id, topo.core_id, topo.smt_id,
2461            cpu->apic_id, ms->possible_cpus->len - 1);
2462        return;
2463    }
2464
2465    if (cpu_slot->cpu) {
2466        error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists",
2467                   idx, cpu->apic_id);
2468        return;
2469    }
2470
2471    /* if 'address' properties socket-id/core-id/thread-id are not set, set them
2472     * so that machine_query_hotpluggable_cpus would show correct values
2473     */
2474    /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
2475     * once -smp refactoring is complete and there will be CPU private
2476     * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */
2477    x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies,
2478                             smp_cores, smp_threads, &topo);
2479    if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) {
2480        error_setg(errp, "property socket-id: %u doesn't match set apic-id:"
2481            " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, topo.pkg_id);
2482        return;
2483    }
2484    cpu->socket_id = topo.pkg_id;
2485
2486    if (cpu->die_id != -1 && cpu->die_id != topo.die_id) {
2487        error_setg(errp, "property die-id: %u doesn't match set apic-id:"
2488            " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo.die_id);
2489        return;
2490    }
2491    cpu->die_id = topo.die_id;
2492
2493    if (cpu->core_id != -1 && cpu->core_id != topo.core_id) {
2494        error_setg(errp, "property core-id: %u doesn't match set apic-id:"
2495            " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, topo.core_id);
2496        return;
2497    }
2498    cpu->core_id = topo.core_id;
2499
2500    if (cpu->thread_id != -1 && cpu->thread_id != topo.smt_id) {
2501        error_setg(errp, "property thread-id: %u doesn't match set apic-id:"
2502            " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, topo.smt_id);
2503        return;
2504    }
2505    cpu->thread_id = topo.smt_id;
2506
2507    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) &&
2508        !kvm_hv_vpindex_settable()) {
2509        error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX");
2510        return;
2511    }
2512
2513    cs = CPU(cpu);
2514    cs->cpu_index = idx;
2515
2516    numa_cpu_pre_plug(cpu_slot, dev, errp);
2517}
2518
2519static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev,
2520                                        DeviceState *dev, Error **errp)
2521{
2522    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
2523    Error *local_err = NULL;
2524
2525    if (!hotplug_dev2) {
2526        /*
2527         * Without a bus hotplug handler, we cannot control the plug/unplug
2528         * order. This should never be the case on x86, however better add
2529         * a safety net.
2530         */
2531        error_setg(errp, "virtio-pmem-pci not supported on this bus.");
2532        return;
2533    }
2534    /*
2535     * First, see if we can plug this memory device at all. If that
2536     * succeeds, branch of to the actual hotplug handler.
2537     */
2538    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
2539                           &local_err);
2540    if (!local_err) {
2541        hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
2542    }
2543    error_propagate(errp, local_err);
2544}
2545
2546static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev,
2547                                    DeviceState *dev, Error **errp)
2548{
2549    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
2550    Error *local_err = NULL;
2551
2552    /*
2553     * Plug the memory device first and then branch off to the actual
2554     * hotplug handler. If that one fails, we can easily undo the memory
2555     * device bits.
2556     */
2557    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
2558    hotplug_handler_plug(hotplug_dev2, dev, &local_err);
2559    if (local_err) {
2560        memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
2561    }
2562    error_propagate(errp, local_err);
2563}
2564
2565static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev,
2566                                              DeviceState *dev, Error **errp)
2567{
2568    /* We don't support virtio pmem hot unplug */
2569    error_setg(errp, "virtio pmem device unplug not supported.");
2570}
2571
2572static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev,
2573                                      DeviceState *dev, Error **errp)
2574{
2575    /* We don't support virtio pmem hot unplug */
2576}
2577
2578static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
2579                                          DeviceState *dev, Error **errp)
2580{
2581    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2582        pc_memory_pre_plug(hotplug_dev, dev, errp);
2583    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
2584        pc_cpu_pre_plug(hotplug_dev, dev, errp);
2585    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
2586        pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp);
2587    }
2588}
2589
2590static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
2591                                      DeviceState *dev, Error **errp)
2592{
2593    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2594        pc_memory_plug(hotplug_dev, dev, errp);
2595    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
2596        pc_cpu_plug(hotplug_dev, dev, errp);
2597    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
2598        pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp);
2599    }
2600}
2601
2602static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
2603                                                DeviceState *dev, Error **errp)
2604{
2605    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2606        pc_memory_unplug_request(hotplug_dev, dev, errp);
2607    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
2608        pc_cpu_unplug_request_cb(hotplug_dev, dev, errp);
2609    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
2610        pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp);
2611    } else {
2612        error_setg(errp, "acpi: device unplug request for not supported device"
2613                   " type: %s", object_get_typename(OBJECT(dev)));
2614    }
2615}
2616
2617static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
2618                                        DeviceState *dev, Error **errp)
2619{
2620    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2621        pc_memory_unplug(hotplug_dev, dev, errp);
2622    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
2623        pc_cpu_unplug_cb(hotplug_dev, dev, errp);
2624    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
2625        pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp);
2626    } else {
2627        error_setg(errp, "acpi: device unplug for not supported device"
2628                   " type: %s", object_get_typename(OBJECT(dev)));
2629    }
2630}
2631
2632static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
2633                                             DeviceState *dev)
2634{
2635    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
2636        object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
2637        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
2638        return HOTPLUG_HANDLER(machine);
2639    }
2640
2641    return NULL;
2642}
2643
2644static void
2645pc_machine_get_device_memory_region_size(Object *obj, Visitor *v,
2646                                         const char *name, void *opaque,
2647                                         Error **errp)
2648{
2649    MachineState *ms = MACHINE(obj);
2650    int64_t value = 0;
2651
2652    if (ms->device_memory) {
2653        value = memory_region_size(&ms->device_memory->mr);
2654    }
2655
2656    visit_type_int(v, name, &value, errp);
2657}
2658
2659static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
2660                                            const char *name, void *opaque,
2661                                            Error **errp)
2662{
2663    PCMachineState *pcms = PC_MACHINE(obj);
2664    uint64_t value = pcms->max_ram_below_4g;
2665
2666    visit_type_size(v, name, &value, errp);
2667}
2668
2669static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
2670                                            const char *name, void *opaque,
2671                                            Error **errp)
2672{
2673    PCMachineState *pcms = PC_MACHINE(obj);
2674    Error *error = NULL;
2675    uint64_t value;
2676
2677    visit_type_size(v, name, &value, &error);
2678    if (error) {
2679        error_propagate(errp, error);
2680        return;
2681    }
2682    if (value > 4 * GiB) {
2683        error_setg(&error,
2684                   "Machine option 'max-ram-below-4g=%"PRIu64
2685                   "' expects size less than or equal to 4G", value);
2686        error_propagate(errp, error);
2687        return;
2688    }
2689
2690    if (value < 1 * MiB) {
2691        warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary,"
2692                    "BIOS may not work with less than 1MiB", value);
2693    }
2694
2695    pcms->max_ram_below_4g = value;
2696}
2697
2698static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name,
2699                                  void *opaque, Error **errp)
2700{
2701    PCMachineState *pcms = PC_MACHINE(obj);
2702    OnOffAuto vmport = pcms->vmport;
2703
2704    visit_type_OnOffAuto(v, name, &vmport, errp);
2705}
2706
2707static void pc_machine_set_vmport(Object *obj, Visitor *v, const char *name,
2708                                  void *opaque, Error **errp)
2709{
2710    PCMachineState *pcms = PC_MACHINE(obj);
2711
2712    visit_type_OnOffAuto(v, name, &pcms->vmport, errp);
2713}
2714
2715bool pc_machine_is_smm_enabled(PCMachineState *pcms)
2716{
2717    bool smm_available = false;
2718
2719    if (pcms->smm == ON_OFF_AUTO_OFF) {
2720        return false;
2721    }
2722
2723    if (tcg_enabled() || qtest_enabled()) {
2724        smm_available = true;
2725    } else if (kvm_enabled()) {
2726        smm_available = kvm_has_smm();
2727    }
2728
2729    if (smm_available) {
2730        return true;
2731    }
2732
2733    if (pcms->smm == ON_OFF_AUTO_ON) {
2734        error_report("System Management Mode not supported by this hypervisor.");
2735        exit(1);
2736    }
2737    return false;
2738}
2739
2740static void pc_machine_get_smm(Object *obj, Visitor *v, const char *name,
2741                               void *opaque, Error **errp)
2742{
2743    PCMachineState *pcms = PC_MACHINE(obj);
2744    OnOffAuto smm = pcms->smm;
2745
2746    visit_type_OnOffAuto(v, name, &smm, errp);
2747}
2748
2749static void pc_machine_set_smm(Object *obj, Visitor *v, const char *name,
2750                               void *opaque, Error **errp)
2751{
2752    PCMachineState *pcms = PC_MACHINE(obj);
2753
2754    visit_type_OnOffAuto(v, name, &pcms->smm, errp);
2755}
2756
2757static bool pc_machine_get_smbus(Object *obj, Error **errp)
2758{
2759    PCMachineState *pcms = PC_MACHINE(obj);
2760
2761    return pcms->smbus_enabled;
2762}
2763
2764static void pc_machine_set_smbus(Object *obj, bool value, Error **errp)
2765{
2766    PCMachineState *pcms = PC_MACHINE(obj);
2767
2768    pcms->smbus_enabled = value;
2769}
2770
2771static bool pc_machine_get_sata(Object *obj, Error **errp)
2772{
2773    PCMachineState *pcms = PC_MACHINE(obj);
2774
2775    return pcms->sata_enabled;
2776}
2777
2778static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
2779{
2780    PCMachineState *pcms = PC_MACHINE(obj);
2781
2782    pcms->sata_enabled = value;
2783}
2784
2785static bool pc_machine_get_pit(Object *obj, Error **errp)
2786{
2787    PCMachineState *pcms = PC_MACHINE(obj);
2788
2789    return pcms->pit_enabled;
2790}
2791
2792static void pc_machine_set_pit(Object *obj, bool value, Error **errp)
2793{
2794    PCMachineState *pcms = PC_MACHINE(obj);
2795
2796    pcms->pit_enabled = value;
2797}
2798
2799static void pc_machine_initfn(Object *obj)
2800{
2801    PCMachineState *pcms = PC_MACHINE(obj);
2802
2803    pcms->max_ram_below_4g = 0; /* use default */
2804    pcms->smm = ON_OFF_AUTO_AUTO;
2805#ifdef CONFIG_VMPORT
2806    pcms->vmport = ON_OFF_AUTO_AUTO;
2807#else
2808    pcms->vmport = ON_OFF_AUTO_OFF;
2809#endif /* CONFIG_VMPORT */
2810    /* acpi build is enabled by default if machine supports it */
2811    pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
2812    pcms->smbus_enabled = true;
2813    pcms->sata_enabled = true;
2814    pcms->pit_enabled = true;
2815    pcms->smp_dies = 1;
2816
2817    pc_system_flash_create(pcms);
2818}
2819
2820static void pc_machine_reset(MachineState *machine)
2821{
2822    CPUState *cs;
2823    X86CPU *cpu;
2824
2825    qemu_devices_reset();
2826
2827    /* Reset APIC after devices have been reset to cancel
2828     * any changes that qemu_devices_reset() might have done.
2829     */
2830    CPU_FOREACH(cs) {
2831        cpu = X86_CPU(cs);
2832
2833        if (cpu->apic_state) {
2834            device_reset(cpu->apic_state);
2835        }
2836    }
2837}
2838
2839static CpuInstanceProperties
2840pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
2841{
2842    MachineClass *mc = MACHINE_GET_CLASS(ms);
2843    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
2844
2845    assert(cpu_index < possible_cpus->len);
2846    return possible_cpus->cpus[cpu_index].props;
2847}
2848
2849static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx)
2850{
2851   X86CPUTopoInfo topo;
2852   PCMachineState *pcms = PC_MACHINE(ms);
2853
2854   assert(idx < ms->possible_cpus->len);
2855   x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id,
2856                            pcms->smp_dies, ms->smp.cores,
2857                            ms->smp.threads, &topo);
2858   return topo.pkg_id % nb_numa_nodes;
2859}
2860
2861static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
2862{
2863    PCMachineState *pcms = PC_MACHINE(ms);
2864    int i;
2865    unsigned int max_cpus = ms->smp.max_cpus;
2866
2867    if (ms->possible_cpus) {
2868        /*
2869         * make sure that max_cpus hasn't changed since the first use, i.e.
2870         * -smp hasn't been parsed after it
2871        */
2872        assert(ms->possible_cpus->len == max_cpus);
2873        return ms->possible_cpus;
2874    }
2875
2876    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
2877                                  sizeof(CPUArchId) * max_cpus);
2878    ms->possible_cpus->len = max_cpus;
2879    for (i = 0; i < ms->possible_cpus->len; i++) {
2880        X86CPUTopoInfo topo;
2881
2882        ms->possible_cpus->cpus[i].type = ms->cpu_type;
2883        ms->possible_cpus->cpus[i].vcpus_count = 1;
2884        ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i);
2885        x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
2886                                 pcms->smp_dies, ms->smp.cores,
2887                                 ms->smp.threads, &topo);
2888        ms->possible_cpus->cpus[i].props.has_socket_id = true;
2889        ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
2890        if (pcms->smp_dies > 1) {
2891            ms->possible_cpus->cpus[i].props.has_die_id = true;
2892            ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
2893        }
2894        ms->possible_cpus->cpus[i].props.has_core_id = true;
2895        ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
2896        ms->possible_cpus->cpus[i].props.has_thread_id = true;
2897        ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
2898    }
2899    return ms->possible_cpus;
2900}
2901
2902static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
2903{
2904    /* cpu index isn't used */
2905    CPUState *cs;
2906
2907    CPU_FOREACH(cs) {
2908        X86CPU *cpu = X86_CPU(cs);
2909
2910        if (!cpu->apic_state) {
2911            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
2912        } else {
2913            apic_deliver_nmi(cpu->apic_state);
2914        }
2915    }
2916}
2917
2918static void pc_machine_class_init(ObjectClass *oc, void *data)
2919{
2920    MachineClass *mc = MACHINE_CLASS(oc);
2921    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
2922    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
2923    NMIClass *nc = NMI_CLASS(oc);
2924
2925    pcmc->pci_enabled = true;
2926    pcmc->has_acpi_build = true;
2927    pcmc->rsdp_in_ram = true;
2928    pcmc->smbios_defaults = true;
2929    pcmc->smbios_uuid_encoded = true;
2930    pcmc->gigabyte_align = true;
2931    pcmc->has_reserved_memory = true;
2932    pcmc->kvmclock_enabled = true;
2933    pcmc->enforce_aligned_dimm = true;
2934    /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
2935     * to be used at the moment, 32K should be enough for a while.  */
2936    pcmc->acpi_data_size = 0x20000 + 0x8000;
2937    pcmc->save_tsc_khz = true;
2938    pcmc->linuxboot_dma_enabled = true;
2939    pcmc->pvh_enabled = true;
2940    assert(!mc->get_hotplug_handler);
2941    mc->get_hotplug_handler = pc_get_hotplug_handler;
2942    mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
2943    mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
2944    mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
2945    mc->auto_enable_numa_with_memhp = true;
2946    mc->has_hotpluggable_cpus = true;
2947    mc->default_boot_order = "cad";
2948    mc->hot_add_cpu = pc_hot_add_cpu;
2949    mc->smp_parse = pc_smp_parse;
2950    mc->block_default_type = IF_IDE;
2951    mc->max_cpus = 255;
2952    mc->reset = pc_machine_reset;
2953    hc->pre_plug = pc_machine_device_pre_plug_cb;
2954    hc->plug = pc_machine_device_plug_cb;
2955    hc->unplug_request = pc_machine_device_unplug_request_cb;
2956    hc->unplug = pc_machine_device_unplug_cb;
2957    nc->nmi_monitor_handler = x86_nmi;
2958    mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
2959    mc->nvdimm_supported = true;
2960    mc->numa_mem_supported = true;
2961
2962    object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int",
2963        pc_machine_get_device_memory_region_size, NULL,
2964        NULL, NULL, &error_abort);
2965
2966    object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
2967        pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g,
2968        NULL, NULL, &error_abort);
2969
2970    object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G,
2971        "Maximum ram below the 4G boundary (32bit boundary)", &error_abort);
2972
2973    object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto",
2974        pc_machine_get_smm, pc_machine_set_smm,
2975        NULL, NULL, &error_abort);
2976    object_class_property_set_description(oc, PC_MACHINE_SMM,
2977        "Enable SMM (pc & q35)", &error_abort);
2978
2979    object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto",
2980        pc_machine_get_vmport, pc_machine_set_vmport,
2981        NULL, NULL, &error_abort);
2982    object_class_property_set_description(oc, PC_MACHINE_VMPORT,
2983        "Enable vmport (pc & q35)", &error_abort);
2984
2985    object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
2986        pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);
2987
2988    object_class_property_add_bool(oc, PC_MACHINE_SATA,
2989        pc_machine_get_sata, pc_machine_set_sata, &error_abort);
2990
2991    object_class_property_add_bool(oc, PC_MACHINE_PIT,
2992        pc_machine_get_pit, pc_machine_set_pit, &error_abort);
2993}
2994
2995static const TypeInfo pc_machine_info = {
2996    .name = TYPE_PC_MACHINE,
2997    .parent = TYPE_MACHINE,
2998    .abstract = true,
2999    .instance_size = sizeof(PCMachineState),
3000    .instance_init = pc_machine_initfn,
3001    .class_size = sizeof(PCMachineClass),
3002    .class_init = pc_machine_class_init,
3003    .interfaces = (InterfaceInfo[]) {
3004         { TYPE_HOTPLUG_HANDLER },
3005         { TYPE_NMI },
3006         { }
3007    },
3008};
3009
3010static void pc_machine_register_types(void)
3011{
3012    type_register_static(&pc_machine_info);
3013}
3014
3015type_init(pc_machine_register_types)
3016