qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_vio.h"
  40#include "hw/ppc/spapr_cpu_core.h"
  41#include "hw/ppc/ppc.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/mmap-alloc.h"
  50#include "elf.h"
  51#include "sysemu/kvm_int.h"
  52
  53//#define DEBUG_KVM
  54
  55#ifdef DEBUG_KVM
  56#define DPRINTF(fmt, ...) \
  57    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58#else
  59#define DPRINTF(fmt, ...) \
  60    do { } while (0)
  61#endif
  62
  63#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66    KVM_CAP_LAST_INFO
  67};
  68
  69static int cap_interrupt_unset = false;
  70static int cap_interrupt_level = false;
  71static int cap_segstate;
  72static int cap_booke_sregs;
  73static int cap_ppc_smt;
  74static int cap_ppc_smt_possible;
  75static int cap_spapr_tce;
  76static int cap_spapr_tce_64;
  77static int cap_spapr_multitce;
  78static int cap_spapr_vfio;
  79static int cap_hior;
  80static int cap_one_reg;
  81static int cap_epr;
  82static int cap_ppc_watchdog;
  83static int cap_papr;
  84static int cap_htab_fd;
  85static int cap_fixup_hcalls;
  86static int cap_htm;             /* Hardware transactional memory support */
  87static int cap_mmu_radix;
  88static int cap_mmu_hash_v3;
  89static int cap_resize_hpt;
  90static int cap_ppc_pvr_compat;
  91static int cap_ppc_safe_cache;
  92static int cap_ppc_safe_bounds_check;
  93static int cap_ppc_safe_indirect_branch;
  94
  95static uint32_t debug_inst_opcode;
  96
  97/* XXX We have a race condition where we actually have a level triggered
  98 *     interrupt, but the infrastructure can't expose that yet, so the guest
  99 *     takes but ignores it, goes to sleep and never gets notified that there's
 100 *     still an interrupt pending.
 101 *
 102 *     As a quick workaround, let's just wake up again 20 ms after we injected
 103 *     an interrupt. That way we can assure that we're always reinjecting
 104 *     interrupts in case the guest swallowed them.
 105 */
 106static QEMUTimer *idle_timer;
 107
 108static void kvm_kick_cpu(void *opaque)
 109{
 110    PowerPCCPU *cpu = opaque;
 111
 112    qemu_cpu_kick(CPU(cpu));
 113}
 114
 115/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116 * should only be used for fallback tests - generally we should use
 117 * explicit capabilities for the features we want, rather than
 118 * assuming what is/isn't available depending on the KVM variant. */
 119static bool kvmppc_is_pr(KVMState *ks)
 120{
 121    /* Assume KVM-PR if the GET_PVINFO capability is available */
 122    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123}
 124
 125static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 126static void kvmppc_get_cpu_characteristics(KVMState *s);
 127
 128int kvm_arch_init(MachineState *ms, KVMState *s)
 129{
 130    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 139    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143    /* Note: we don't set cap_papr here, because this capability is
 144     * only activated after this by kvmppc_set_papr() */
 145    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 148    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 149    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 150    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 151    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152    kvmppc_get_cpu_characteristics(s);
 153    /*
 154     * Note: setting it to false because there is not such capability
 155     * in KVM at this moment.
 156     *
 157     * TODO: call kvm_vm_check_extension() with the right capability
 158     * after the kernel starts implementing it.*/
 159    cap_ppc_pvr_compat = false;
 160
 161    if (!cap_interrupt_level) {
 162        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                        "VM to stall at times!\n");
 164    }
 165
 166    kvm_ppc_register_host_cpu_type(ms);
 167
 168    return 0;
 169}
 170
 171int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172{
 173    return 0;
 174}
 175
 176static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177{
 178    CPUPPCState *cenv = &cpu->env;
 179    CPUState *cs = CPU(cpu);
 180    struct kvm_sregs sregs;
 181    int ret;
 182
 183    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184        /* What we're really trying to say is "if we're on BookE, we use
 185           the native PVR for now". This is the only sane way to check
 186           it though, so we potentially confuse users that they can run
 187           BookE guests on BookS. Let's hope nobody dares enough :) */
 188        return 0;
 189    } else {
 190        if (!cap_segstate) {
 191            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192            return -ENOSYS;
 193        }
 194    }
 195
 196    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197    if (ret) {
 198        return ret;
 199    }
 200
 201    sregs.pvr = cenv->spr[SPR_PVR];
 202    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203}
 204
 205/* Set up a shared TLB array with KVM */
 206static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207{
 208    CPUPPCState *env = &cpu->env;
 209    CPUState *cs = CPU(cpu);
 210    struct kvm_book3e_206_tlb_params params = {};
 211    struct kvm_config_tlb cfg = {};
 212    unsigned int entries = 0;
 213    int ret, i;
 214
 215    if (!kvm_enabled() ||
 216        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217        return 0;
 218    }
 219
 220    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225        entries += params.tlb_sizes[i];
 226    }
 227
 228    assert(entries == env->nb_tlb);
 229    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231    env->tlb_dirty = true;
 232
 233    cfg.array = (uintptr_t)env->tlb.tlbm;
 234    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235    cfg.params = (uintptr_t)&params;
 236    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239    if (ret < 0) {
 240        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                __func__, strerror(-ret));
 242        return ret;
 243    }
 244
 245    env->kvm_sw_tlb = true;
 246    return 0;
 247}
 248
 249
 250#if defined(TARGET_PPC64)
 251static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 252{
 253    int ret;
 254
 255    assert(kvm_state != NULL);
 256
 257    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 258        error_setg(errp, "KVM doesn't expose the MMU features it supports");
 259        error_append_hint(errp, "Consider switching to a newer KVM\n");
 260        return;
 261    }
 262
 263    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 264    if (ret == 0) {
 265        return;
 266    }
 267
 268    error_setg_errno(errp, -ret,
 269                     "KVM failed to provide the MMU features it supports");
 270}
 271
 272struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 273{
 274    KVMState *s = KVM_STATE(current_machine->accelerator);
 275    struct ppc_radix_page_info *radix_page_info;
 276    struct kvm_ppc_rmmu_info rmmu_info;
 277    int i;
 278
 279    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 280        return NULL;
 281    }
 282    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 283        return NULL;
 284    }
 285    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 286    radix_page_info->count = 0;
 287    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 288        if (rmmu_info.ap_encodings[i]) {
 289            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 290            radix_page_info->count++;
 291        }
 292    }
 293    return radix_page_info;
 294}
 295
 296target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 297                                     bool radix, bool gtse,
 298                                     uint64_t proc_tbl)
 299{
 300    CPUState *cs = CPU(cpu);
 301    int ret;
 302    uint64_t flags = 0;
 303    struct kvm_ppc_mmuv3_cfg cfg = {
 304        .process_table = proc_tbl,
 305    };
 306
 307    if (radix) {
 308        flags |= KVM_PPC_MMUV3_RADIX;
 309    }
 310    if (gtse) {
 311        flags |= KVM_PPC_MMUV3_GTSE;
 312    }
 313    cfg.flags = flags;
 314    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 315    switch (ret) {
 316    case 0:
 317        return H_SUCCESS;
 318    case -EINVAL:
 319        return H_PARAMETER;
 320    case -ENODEV:
 321        return H_NOT_AVAILABLE;
 322    default:
 323        return H_HARDWARE;
 324    }
 325}
 326
 327bool kvmppc_hpt_needs_host_contiguous_pages(void)
 328{
 329    static struct kvm_ppc_smmu_info smmu_info;
 330
 331    if (!kvm_enabled()) {
 332        return false;
 333    }
 334
 335    kvm_get_smmu_info(&smmu_info, &error_fatal);
 336    return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 337}
 338
 339void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 340{
 341    struct kvm_ppc_smmu_info smmu_info;
 342    int iq, ik, jq, jk;
 343    Error *local_err = NULL;
 344
 345    /* For now, we only have anything to check on hash64 MMUs */
 346    if (!cpu->hash64_opts || !kvm_enabled()) {
 347        return;
 348    }
 349
 350    kvm_get_smmu_info(&smmu_info, &local_err);
 351    if (local_err) {
 352        error_propagate(errp, local_err);
 353        return;
 354    }
 355
 356    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 357        && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 358        error_setg(errp,
 359                   "KVM does not support 1TiB segments which guest expects");
 360        return;
 361    }
 362
 363    if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 364        error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 365                   smmu_info.slb_size, cpu->hash64_opts->slb_size);
 366        return;
 367    }
 368
 369    /*
 370     * Verify that every pagesize supported by the cpu model is
 371     * supported by KVM with the same encodings
 372     */
 373    for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 374        PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 375        struct kvm_ppc_one_seg_page_size *ksps;
 376
 377        for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 378            if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 379                break;
 380            }
 381        }
 382        if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 383            error_setg(errp, "KVM doesn't support for base page shift %u",
 384                       qsps->page_shift);
 385            return;
 386        }
 387
 388        ksps = &smmu_info.sps[ik];
 389        if (ksps->slb_enc != qsps->slb_enc) {
 390            error_setg(errp,
 391"KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 392                       ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 393            return;
 394        }
 395
 396        for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 397            for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 398                if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 399                    break;
 400                }
 401            }
 402
 403            if (jk >= ARRAY_SIZE(ksps->enc)) {
 404                error_setg(errp, "KVM doesn't support page shift %u/%u",
 405                           qsps->enc[jq].page_shift, qsps->page_shift);
 406                return;
 407            }
 408            if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 409                error_setg(errp,
 410"KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 411                           ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 412                           qsps->page_shift, qsps->enc[jq].pte_enc);
 413                return;
 414            }
 415        }
 416    }
 417
 418    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 419        /* Mostly what guest pagesizes we can use are related to the
 420         * host pages used to map guest RAM, which is handled in the
 421         * platform code. Cache-Inhibited largepages (64k) however are
 422         * used for I/O, so if they're mapped to the host at all it
 423         * will be a normal mapping, not a special hugepage one used
 424         * for RAM. */
 425        if (getpagesize() < 0x10000) {
 426            error_setg(errp,
 427                       "KVM can't supply 64kiB CI pages, which guest expects");
 428        }
 429    }
 430}
 431#endif /* !defined (TARGET_PPC64) */
 432
 433unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 434{
 435    return POWERPC_CPU(cpu)->vcpu_id;
 436}
 437
 438/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 439 * book3s supports only 1 watchpoint, so array size
 440 * of 4 is sufficient for now.
 441 */
 442#define MAX_HW_BKPTS 4
 443
 444static struct HWBreakpoint {
 445    target_ulong addr;
 446    int type;
 447} hw_debug_points[MAX_HW_BKPTS];
 448
 449static CPUWatchpoint hw_watchpoint;
 450
 451/* Default there is no breakpoint and watchpoint supported */
 452static int max_hw_breakpoint;
 453static int max_hw_watchpoint;
 454static int nb_hw_breakpoint;
 455static int nb_hw_watchpoint;
 456
 457static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 458{
 459    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 460        max_hw_breakpoint = 2;
 461        max_hw_watchpoint = 2;
 462    }
 463
 464    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 465        fprintf(stderr, "Error initializing h/w breakpoints\n");
 466        return;
 467    }
 468}
 469
 470int kvm_arch_init_vcpu(CPUState *cs)
 471{
 472    PowerPCCPU *cpu = POWERPC_CPU(cs);
 473    CPUPPCState *cenv = &cpu->env;
 474    int ret;
 475
 476    /* Synchronize sregs with kvm */
 477    ret = kvm_arch_sync_sregs(cpu);
 478    if (ret) {
 479        if (ret == -EINVAL) {
 480            error_report("Register sync failed... If you're using kvm-hv.ko,"
 481                         " only \"-cpu host\" is possible");
 482        }
 483        return ret;
 484    }
 485
 486    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 487
 488    switch (cenv->mmu_model) {
 489    case POWERPC_MMU_BOOKE206:
 490        /* This target supports access to KVM's guest TLB */
 491        ret = kvm_booke206_tlb_init(cpu);
 492        break;
 493    case POWERPC_MMU_2_07:
 494        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 495            /* KVM-HV has transactional memory on POWER8 also without the
 496             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 497             * long as it's availble to userspace on the host. */
 498            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 499                cap_htm = true;
 500            }
 501        }
 502        break;
 503    default:
 504        break;
 505    }
 506
 507    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 508    kvmppc_hw_debug_points_init(cenv);
 509
 510    return ret;
 511}
 512
 513static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 514{
 515    CPUPPCState *env = &cpu->env;
 516    CPUState *cs = CPU(cpu);
 517    struct kvm_dirty_tlb dirty_tlb;
 518    unsigned char *bitmap;
 519    int ret;
 520
 521    if (!env->kvm_sw_tlb) {
 522        return;
 523    }
 524
 525    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 526    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 527
 528    dirty_tlb.bitmap = (uintptr_t)bitmap;
 529    dirty_tlb.num_dirty = env->nb_tlb;
 530
 531    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 532    if (ret) {
 533        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 534                __func__, strerror(-ret));
 535    }
 536
 537    g_free(bitmap);
 538}
 539
 540static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 541{
 542    PowerPCCPU *cpu = POWERPC_CPU(cs);
 543    CPUPPCState *env = &cpu->env;
 544    union {
 545        uint32_t u32;
 546        uint64_t u64;
 547    } val;
 548    struct kvm_one_reg reg = {
 549        .id = id,
 550        .addr = (uintptr_t) &val,
 551    };
 552    int ret;
 553
 554    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 555    if (ret != 0) {
 556        trace_kvm_failed_spr_get(spr, strerror(errno));
 557    } else {
 558        switch (id & KVM_REG_SIZE_MASK) {
 559        case KVM_REG_SIZE_U32:
 560            env->spr[spr] = val.u32;
 561            break;
 562
 563        case KVM_REG_SIZE_U64:
 564            env->spr[spr] = val.u64;
 565            break;
 566
 567        default:
 568            /* Don't handle this size yet */
 569            abort();
 570        }
 571    }
 572}
 573
 574static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 575{
 576    PowerPCCPU *cpu = POWERPC_CPU(cs);
 577    CPUPPCState *env = &cpu->env;
 578    union {
 579        uint32_t u32;
 580        uint64_t u64;
 581    } val;
 582    struct kvm_one_reg reg = {
 583        .id = id,
 584        .addr = (uintptr_t) &val,
 585    };
 586    int ret;
 587
 588    switch (id & KVM_REG_SIZE_MASK) {
 589    case KVM_REG_SIZE_U32:
 590        val.u32 = env->spr[spr];
 591        break;
 592
 593    case KVM_REG_SIZE_U64:
 594        val.u64 = env->spr[spr];
 595        break;
 596
 597    default:
 598        /* Don't handle this size yet */
 599        abort();
 600    }
 601
 602    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 603    if (ret != 0) {
 604        trace_kvm_failed_spr_set(spr, strerror(errno));
 605    }
 606}
 607
 608static int kvm_put_fp(CPUState *cs)
 609{
 610    PowerPCCPU *cpu = POWERPC_CPU(cs);
 611    CPUPPCState *env = &cpu->env;
 612    struct kvm_one_reg reg;
 613    int i;
 614    int ret;
 615
 616    if (env->insns_flags & PPC_FLOAT) {
 617        uint64_t fpscr = env->fpscr;
 618        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 619
 620        reg.id = KVM_REG_PPC_FPSCR;
 621        reg.addr = (uintptr_t)&fpscr;
 622        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 623        if (ret < 0) {
 624            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 625            return ret;
 626        }
 627
 628        for (i = 0; i < 32; i++) {
 629            uint64_t vsr[2];
 630
 631#ifdef HOST_WORDS_BIGENDIAN
 632            vsr[0] = float64_val(env->fpr[i]);
 633            vsr[1] = env->vsr[i];
 634#else
 635            vsr[0] = env->vsr[i];
 636            vsr[1] = float64_val(env->fpr[i]);
 637#endif
 638            reg.addr = (uintptr_t) &vsr;
 639            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 640
 641            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 642            if (ret < 0) {
 643                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 644                        i, strerror(errno));
 645                return ret;
 646            }
 647        }
 648    }
 649
 650    if (env->insns_flags & PPC_ALTIVEC) {
 651        reg.id = KVM_REG_PPC_VSCR;
 652        reg.addr = (uintptr_t)&env->vscr;
 653        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 654        if (ret < 0) {
 655            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 656            return ret;
 657        }
 658
 659        for (i = 0; i < 32; i++) {
 660            reg.id = KVM_REG_PPC_VR(i);
 661            reg.addr = (uintptr_t)&env->avr[i];
 662            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 663            if (ret < 0) {
 664                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 665                return ret;
 666            }
 667        }
 668    }
 669
 670    return 0;
 671}
 672
 673static int kvm_get_fp(CPUState *cs)
 674{
 675    PowerPCCPU *cpu = POWERPC_CPU(cs);
 676    CPUPPCState *env = &cpu->env;
 677    struct kvm_one_reg reg;
 678    int i;
 679    int ret;
 680
 681    if (env->insns_flags & PPC_FLOAT) {
 682        uint64_t fpscr;
 683        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 684
 685        reg.id = KVM_REG_PPC_FPSCR;
 686        reg.addr = (uintptr_t)&fpscr;
 687        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 688        if (ret < 0) {
 689            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 690            return ret;
 691        } else {
 692            env->fpscr = fpscr;
 693        }
 694
 695        for (i = 0; i < 32; i++) {
 696            uint64_t vsr[2];
 697
 698            reg.addr = (uintptr_t) &vsr;
 699            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 700
 701            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 702            if (ret < 0) {
 703                DPRINTF("Unable to get %s%d from KVM: %s\n",
 704                        vsx ? "VSR" : "FPR", i, strerror(errno));
 705                return ret;
 706            } else {
 707#ifdef HOST_WORDS_BIGENDIAN
 708                env->fpr[i] = vsr[0];
 709                if (vsx) {
 710                    env->vsr[i] = vsr[1];
 711                }
 712#else
 713                env->fpr[i] = vsr[1];
 714                if (vsx) {
 715                    env->vsr[i] = vsr[0];
 716                }
 717#endif
 718            }
 719        }
 720    }
 721
 722    if (env->insns_flags & PPC_ALTIVEC) {
 723        reg.id = KVM_REG_PPC_VSCR;
 724        reg.addr = (uintptr_t)&env->vscr;
 725        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 726        if (ret < 0) {
 727            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 728            return ret;
 729        }
 730
 731        for (i = 0; i < 32; i++) {
 732            reg.id = KVM_REG_PPC_VR(i);
 733            reg.addr = (uintptr_t)&env->avr[i];
 734            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 735            if (ret < 0) {
 736                DPRINTF("Unable to get VR%d from KVM: %s\n",
 737                        i, strerror(errno));
 738                return ret;
 739            }
 740        }
 741    }
 742
 743    return 0;
 744}
 745
 746#if defined(TARGET_PPC64)
 747static int kvm_get_vpa(CPUState *cs)
 748{
 749    PowerPCCPU *cpu = POWERPC_CPU(cs);
 750    sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 751    struct kvm_one_reg reg;
 752    int ret;
 753
 754    reg.id = KVM_REG_PPC_VPA_ADDR;
 755    reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 756    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 757    if (ret < 0) {
 758        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 759        return ret;
 760    }
 761
 762    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 763           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 764    reg.id = KVM_REG_PPC_VPA_SLB;
 765    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 766    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767    if (ret < 0) {
 768        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 769                strerror(errno));
 770        return ret;
 771    }
 772
 773    assert((uintptr_t)&spapr_cpu->dtl_size
 774           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 775    reg.id = KVM_REG_PPC_VPA_DTL;
 776    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 777    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 778    if (ret < 0) {
 779        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 780                strerror(errno));
 781        return ret;
 782    }
 783
 784    return 0;
 785}
 786
 787static int kvm_put_vpa(CPUState *cs)
 788{
 789    PowerPCCPU *cpu = POWERPC_CPU(cs);
 790    sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 791    struct kvm_one_reg reg;
 792    int ret;
 793
 794    /* SLB shadow or DTL can't be registered unless a master VPA is
 795     * registered.  That means when restoring state, if a VPA *is*
 796     * registered, we need to set that up first.  If not, we need to
 797     * deregister the others before deregistering the master VPA */
 798    assert(spapr_cpu->vpa_addr
 799           || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 800
 801    if (spapr_cpu->vpa_addr) {
 802        reg.id = KVM_REG_PPC_VPA_ADDR;
 803        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 804        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 805        if (ret < 0) {
 806            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 807            return ret;
 808        }
 809    }
 810
 811    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 812           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 813    reg.id = KVM_REG_PPC_VPA_SLB;
 814    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 815    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 816    if (ret < 0) {
 817        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 818        return ret;
 819    }
 820
 821    assert((uintptr_t)&spapr_cpu->dtl_size
 822           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 823    reg.id = KVM_REG_PPC_VPA_DTL;
 824    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 825    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 826    if (ret < 0) {
 827        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 828                strerror(errno));
 829        return ret;
 830    }
 831
 832    if (!spapr_cpu->vpa_addr) {
 833        reg.id = KVM_REG_PPC_VPA_ADDR;
 834        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 835        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836        if (ret < 0) {
 837            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 838            return ret;
 839        }
 840    }
 841
 842    return 0;
 843}
 844#endif /* TARGET_PPC64 */
 845
 846int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 847{
 848    CPUPPCState *env = &cpu->env;
 849    struct kvm_sregs sregs;
 850    int i;
 851
 852    sregs.pvr = env->spr[SPR_PVR];
 853
 854    if (cpu->vhyp) {
 855        PPCVirtualHypervisorClass *vhc =
 856            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 857        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 858    } else {
 859        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 860    }
 861
 862    /* Sync SLB */
 863#ifdef TARGET_PPC64
 864    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 865        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 866        if (env->slb[i].esid & SLB_ESID_V) {
 867            sregs.u.s.ppc64.slb[i].slbe |= i;
 868        }
 869        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 870    }
 871#endif
 872
 873    /* Sync SRs */
 874    for (i = 0; i < 16; i++) {
 875        sregs.u.s.ppc32.sr[i] = env->sr[i];
 876    }
 877
 878    /* Sync BATs */
 879    for (i = 0; i < 8; i++) {
 880        /* Beware. We have to swap upper and lower bits here */
 881        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 882            | env->DBAT[1][i];
 883        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 884            | env->IBAT[1][i];
 885    }
 886
 887    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 888}
 889
 890int kvm_arch_put_registers(CPUState *cs, int level)
 891{
 892    PowerPCCPU *cpu = POWERPC_CPU(cs);
 893    CPUPPCState *env = &cpu->env;
 894    struct kvm_regs regs;
 895    int ret;
 896    int i;
 897
 898    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 899    if (ret < 0) {
 900        return ret;
 901    }
 902
 903    regs.ctr = env->ctr;
 904    regs.lr  = env->lr;
 905    regs.xer = cpu_read_xer(env);
 906    regs.msr = env->msr;
 907    regs.pc = env->nip;
 908
 909    regs.srr0 = env->spr[SPR_SRR0];
 910    regs.srr1 = env->spr[SPR_SRR1];
 911
 912    regs.sprg0 = env->spr[SPR_SPRG0];
 913    regs.sprg1 = env->spr[SPR_SPRG1];
 914    regs.sprg2 = env->spr[SPR_SPRG2];
 915    regs.sprg3 = env->spr[SPR_SPRG3];
 916    regs.sprg4 = env->spr[SPR_SPRG4];
 917    regs.sprg5 = env->spr[SPR_SPRG5];
 918    regs.sprg6 = env->spr[SPR_SPRG6];
 919    regs.sprg7 = env->spr[SPR_SPRG7];
 920
 921    regs.pid = env->spr[SPR_BOOKE_PID];
 922
 923    for (i = 0;i < 32; i++)
 924        regs.gpr[i] = env->gpr[i];
 925
 926    regs.cr = 0;
 927    for (i = 0; i < 8; i++) {
 928        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 929    }
 930
 931    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 932    if (ret < 0)
 933        return ret;
 934
 935    kvm_put_fp(cs);
 936
 937    if (env->tlb_dirty) {
 938        kvm_sw_tlb_put(cpu);
 939        env->tlb_dirty = false;
 940    }
 941
 942    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 943        ret = kvmppc_put_books_sregs(cpu);
 944        if (ret < 0) {
 945            return ret;
 946        }
 947    }
 948
 949    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 950        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 951    }
 952
 953    if (cap_one_reg) {
 954        int i;
 955
 956        /* We deliberately ignore errors here, for kernels which have
 957         * the ONE_REG calls, but don't support the specific
 958         * registers, there's a reasonable chance things will still
 959         * work, at least until we try to migrate. */
 960        for (i = 0; i < 1024; i++) {
 961            uint64_t id = env->spr_cb[i].one_reg_id;
 962
 963            if (id != 0) {
 964                kvm_put_one_spr(cs, id, i);
 965            }
 966        }
 967
 968#ifdef TARGET_PPC64
 969        if (msr_ts) {
 970            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 971                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 972            }
 973            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 974                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 975            }
 976            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 977            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 978            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 979            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 980            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 981            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 982            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 983            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 984            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 985            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 986        }
 987
 988        if (cap_papr) {
 989            if (kvm_put_vpa(cs) < 0) {
 990                DPRINTF("Warning: Unable to set VPA information to KVM\n");
 991            }
 992        }
 993
 994        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 995#endif /* TARGET_PPC64 */
 996    }
 997
 998    return ret;
 999}
1000
1001static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1002{
1003     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1004}
1005
1006static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1007{
1008    CPUPPCState *env = &cpu->env;
1009    struct kvm_sregs sregs;
1010    int ret;
1011
1012    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013    if (ret < 0) {
1014        return ret;
1015    }
1016
1017    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025        env->spr[SPR_DECR] = sregs.u.e.dec;
1026        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1029    }
1030
1031    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1037    }
1038
1039    if (sregs.u.e.features & KVM_SREGS_E_64) {
1040        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1041    }
1042
1043    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1045    }
1046
1047    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1050        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1052        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1054        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1056        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1058        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1060        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1062        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1064        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1066        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1068        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1070        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1072        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1074        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1076        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1078        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1080
1081        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1084            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1086            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1088        }
1089
1090        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1093        }
1094
1095        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1098            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1100        }
1101    }
1102
1103    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1114    }
1115
1116    if (sregs.u.e.features & KVM_SREGS_EXP) {
1117        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1118    }
1119
1120    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1123    }
1124
1125    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1129
1130        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1133        }
1134    }
1135
1136    return 0;
1137}
1138
1139static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1140{
1141    CPUPPCState *env = &cpu->env;
1142    struct kvm_sregs sregs;
1143    int ret;
1144    int i;
1145
1146    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147    if (ret < 0) {
1148        return ret;
1149    }
1150
1151    if (!cpu->vhyp) {
1152        ppc_store_sdr1(env, sregs.u.s.sdr1);
1153    }
1154
1155    /* Sync SLB */
1156#ifdef TARGET_PPC64
1157    /*
1158     * The packed SLB array we get from KVM_GET_SREGS only contains
1159     * information about valid entries. So we flush our internal copy
1160     * to get rid of stale ones, then put all valid SLB entries back
1161     * in.
1162     */
1163    memset(env->slb, 0, sizeof(env->slb));
1164    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1167        /*
1168         * Only restore valid entries
1169         */
1170        if (rb & SLB_ESID_V) {
1171            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1172        }
1173    }
1174#endif
1175
1176    /* Sync SRs */
1177    for (i = 0; i < 16; i++) {
1178        env->sr[i] = sregs.u.s.ppc32.sr[i];
1179    }
1180
1181    /* Sync BATs */
1182    for (i = 0; i < 8; i++) {
1183        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1187    }
1188
1189    return 0;
1190}
1191
1192int kvm_arch_get_registers(CPUState *cs)
1193{
1194    PowerPCCPU *cpu = POWERPC_CPU(cs);
1195    CPUPPCState *env = &cpu->env;
1196    struct kvm_regs regs;
1197    uint32_t cr;
1198    int i, ret;
1199
1200    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201    if (ret < 0)
1202        return ret;
1203
1204    cr = regs.cr;
1205    for (i = 7; i >= 0; i--) {
1206        env->crf[i] = cr & 15;
1207        cr >>= 4;
1208    }
1209
1210    env->ctr = regs.ctr;
1211    env->lr = regs.lr;
1212    cpu_write_xer(env, regs.xer);
1213    env->msr = regs.msr;
1214    env->nip = regs.pc;
1215
1216    env->spr[SPR_SRR0] = regs.srr0;
1217    env->spr[SPR_SRR1] = regs.srr1;
1218
1219    env->spr[SPR_SPRG0] = regs.sprg0;
1220    env->spr[SPR_SPRG1] = regs.sprg1;
1221    env->spr[SPR_SPRG2] = regs.sprg2;
1222    env->spr[SPR_SPRG3] = regs.sprg3;
1223    env->spr[SPR_SPRG4] = regs.sprg4;
1224    env->spr[SPR_SPRG5] = regs.sprg5;
1225    env->spr[SPR_SPRG6] = regs.sprg6;
1226    env->spr[SPR_SPRG7] = regs.sprg7;
1227
1228    env->spr[SPR_BOOKE_PID] = regs.pid;
1229
1230    for (i = 0;i < 32; i++)
1231        env->gpr[i] = regs.gpr[i];
1232
1233    kvm_get_fp(cs);
1234
1235    if (cap_booke_sregs) {
1236        ret = kvmppc_get_booke_sregs(cpu);
1237        if (ret < 0) {
1238            return ret;
1239        }
1240    }
1241
1242    if (cap_segstate) {
1243        ret = kvmppc_get_books_sregs(cpu);
1244        if (ret < 0) {
1245            return ret;
1246        }
1247    }
1248
1249    if (cap_hior) {
1250        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1251    }
1252
1253    if (cap_one_reg) {
1254        int i;
1255
1256        /* We deliberately ignore errors here, for kernels which have
1257         * the ONE_REG calls, but don't support the specific
1258         * registers, there's a reasonable chance things will still
1259         * work, at least until we try to migrate. */
1260        for (i = 0; i < 1024; i++) {
1261            uint64_t id = env->spr_cb[i].one_reg_id;
1262
1263            if (id != 0) {
1264                kvm_get_one_spr(cs, id, i);
1265            }
1266        }
1267
1268#ifdef TARGET_PPC64
1269        if (msr_ts) {
1270            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1272            }
1273            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1275            }
1276            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1286        }
1287
1288        if (cap_papr) {
1289            if (kvm_get_vpa(cs) < 0) {
1290                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1291            }
1292        }
1293
1294        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295#endif
1296    }
1297
1298    return 0;
1299}
1300
1301int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1302{
1303    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1304
1305    if (irq != PPC_INTERRUPT_EXT) {
1306        return 0;
1307    }
1308
1309    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310        return 0;
1311    }
1312
1313    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1314
1315    return 0;
1316}
1317
1318#if defined(TARGET_PPCEMB)
1319#define PPC_INPUT_INT PPC40x_INPUT_INT
1320#elif defined(TARGET_PPC64)
1321#define PPC_INPUT_INT PPC970_INPUT_INT
1322#else
1323#define PPC_INPUT_INT PPC6xx_INPUT_INT
1324#endif
1325
1326void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327{
1328    PowerPCCPU *cpu = POWERPC_CPU(cs);
1329    CPUPPCState *env = &cpu->env;
1330    int r;
1331    unsigned irq;
1332
1333    qemu_mutex_lock_iothread();
1334
1335    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337    if (!cap_interrupt_level &&
1338        run->ready_for_interrupt_injection &&
1339        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341    {
1342        /* For now KVM disregards the 'irq' argument. However, in the
1343         * future KVM could cache it in-kernel to avoid a heavyweight exit
1344         * when reading the UIC.
1345         */
1346        irq = KVM_INTERRUPT_SET;
1347
1348        DPRINTF("injected interrupt %d\n", irq);
1349        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350        if (r < 0) {
1351            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352        }
1353
1354        /* Always wake up soon in case the interrupt was level based */
1355        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356                       (NANOSECONDS_PER_SECOND / 50));
1357    }
1358
1359    /* We don't know if there are more interrupts pending after this. However,
1360     * the guest will return to userspace in the course of handling this one
1361     * anyways, so we will get a chance to deliver the rest. */
1362
1363    qemu_mutex_unlock_iothread();
1364}
1365
1366MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367{
1368    return MEMTXATTRS_UNSPECIFIED;
1369}
1370
1371int kvm_arch_process_async_events(CPUState *cs)
1372{
1373    return cs->halted;
1374}
1375
1376static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377{
1378    CPUState *cs = CPU(cpu);
1379    CPUPPCState *env = &cpu->env;
1380
1381    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382        cs->halted = 1;
1383        cs->exception_index = EXCP_HLT;
1384    }
1385
1386    return 0;
1387}
1388
1389/* map dcr access to existing qemu dcr emulation */
1390static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391{
1392    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394
1395    return 0;
1396}
1397
1398static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399{
1400    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402
1403    return 0;
1404}
1405
1406int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407{
1408    /* Mixed endian case is not handled */
1409    uint32_t sc = debug_inst_opcode;
1410
1411    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412                            sizeof(sc), 0) ||
1413        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414        return -EINVAL;
1415    }
1416
1417    return 0;
1418}
1419
1420int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421{
1422    uint32_t sc;
1423
1424    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425        sc != debug_inst_opcode ||
1426        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427                            sizeof(sc), 1)) {
1428        return -EINVAL;
1429    }
1430
1431    return 0;
1432}
1433
1434static int find_hw_breakpoint(target_ulong addr, int type)
1435{
1436    int n;
1437
1438    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439           <= ARRAY_SIZE(hw_debug_points));
1440
1441    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442        if (hw_debug_points[n].addr == addr &&
1443             hw_debug_points[n].type == type) {
1444            return n;
1445        }
1446    }
1447
1448    return -1;
1449}
1450
1451static int find_hw_watchpoint(target_ulong addr, int *flag)
1452{
1453    int n;
1454
1455    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456    if (n >= 0) {
1457        *flag = BP_MEM_ACCESS;
1458        return n;
1459    }
1460
1461    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462    if (n >= 0) {
1463        *flag = BP_MEM_WRITE;
1464        return n;
1465    }
1466
1467    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468    if (n >= 0) {
1469        *flag = BP_MEM_READ;
1470        return n;
1471    }
1472
1473    return -1;
1474}
1475
1476int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477                                  target_ulong len, int type)
1478{
1479    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480        return -ENOBUFS;
1481    }
1482
1483    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485
1486    switch (type) {
1487    case GDB_BREAKPOINT_HW:
1488        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489            return -ENOBUFS;
1490        }
1491
1492        if (find_hw_breakpoint(addr, type) >= 0) {
1493            return -EEXIST;
1494        }
1495
1496        nb_hw_breakpoint++;
1497        break;
1498
1499    case GDB_WATCHPOINT_WRITE:
1500    case GDB_WATCHPOINT_READ:
1501    case GDB_WATCHPOINT_ACCESS:
1502        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503            return -ENOBUFS;
1504        }
1505
1506        if (find_hw_breakpoint(addr, type) >= 0) {
1507            return -EEXIST;
1508        }
1509
1510        nb_hw_watchpoint++;
1511        break;
1512
1513    default:
1514        return -ENOSYS;
1515    }
1516
1517    return 0;
1518}
1519
1520int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521                                  target_ulong len, int type)
1522{
1523    int n;
1524
1525    n = find_hw_breakpoint(addr, type);
1526    if (n < 0) {
1527        return -ENOENT;
1528    }
1529
1530    switch (type) {
1531    case GDB_BREAKPOINT_HW:
1532        nb_hw_breakpoint--;
1533        break;
1534
1535    case GDB_WATCHPOINT_WRITE:
1536    case GDB_WATCHPOINT_READ:
1537    case GDB_WATCHPOINT_ACCESS:
1538        nb_hw_watchpoint--;
1539        break;
1540
1541    default:
1542        return -ENOSYS;
1543    }
1544    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545
1546    return 0;
1547}
1548
1549void kvm_arch_remove_all_hw_breakpoints(void)
1550{
1551    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552}
1553
1554void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555{
1556    int n;
1557
1558    /* Software Breakpoint updates */
1559    if (kvm_sw_breakpoints_active(cs)) {
1560        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561    }
1562
1563    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564           <= ARRAY_SIZE(hw_debug_points));
1565    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566
1567    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571            switch (hw_debug_points[n].type) {
1572            case GDB_BREAKPOINT_HW:
1573                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574                break;
1575            case GDB_WATCHPOINT_WRITE:
1576                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577                break;
1578            case GDB_WATCHPOINT_READ:
1579                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580                break;
1581            case GDB_WATCHPOINT_ACCESS:
1582                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583                                        KVMPPC_DEBUG_WATCH_READ;
1584                break;
1585            default:
1586                cpu_abort(cs, "Unsupported breakpoint type\n");
1587            }
1588            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589        }
1590    }
1591}
1592
1593static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594{
1595    CPUState *cs = CPU(cpu);
1596    CPUPPCState *env = &cpu->env;
1597    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598    int handle = 0;
1599    int n;
1600    int flag = 0;
1601
1602    if (cs->singlestep_enabled) {
1603        handle = 1;
1604    } else if (arch_info->status) {
1605        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608                if (n >= 0) {
1609                    handle = 1;
1610                }
1611            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1613                n = find_hw_watchpoint(arch_info->address,  &flag);
1614                if (n >= 0) {
1615                    handle = 1;
1616                    cs->watchpoint_hit = &hw_watchpoint;
1617                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618                    hw_watchpoint.flags = flag;
1619                }
1620            }
1621        }
1622    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623        handle = 1;
1624    } else {
1625        /* QEMU is not able to handle debug exception, so inject
1626         * program exception to guest;
1627         * Yes program exception NOT debug exception !!
1628         * When QEMU is using debug resources then debug exception must
1629         * be always set. To achieve this we set MSR_DE and also set
1630         * MSRP_DEP so guest cannot change MSR_DE.
1631         * When emulating debug resource for guest we want guest
1632         * to control MSR_DE (enable/disable debug interrupt on need).
1633         * Supporting both configurations are NOT possible.
1634         * So the result is that we cannot share debug resources
1635         * between QEMU and Guest on BOOKE architecture.
1636         * In the current design QEMU gets the priority over guest,
1637         * this means that if QEMU is using debug resources then guest
1638         * cannot use them;
1639         * For software breakpoint QEMU uses a privileged instruction;
1640         * So there cannot be any reason that we are here for guest
1641         * set debug exception, only possibility is guest executed a
1642         * privileged / illegal instruction and that's why we are
1643         * injecting a program interrupt.
1644         */
1645
1646        cpu_synchronize_state(cs);
1647        /* env->nip is PC, so increment this by 4 to use
1648         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649         */
1650        env->nip += 4;
1651        cs->exception_index = POWERPC_EXCP_PROGRAM;
1652        env->error_code = POWERPC_EXCP_INVAL;
1653        ppc_cpu_do_interrupt(cs);
1654    }
1655
1656    return handle;
1657}
1658
1659int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660{
1661    PowerPCCPU *cpu = POWERPC_CPU(cs);
1662    CPUPPCState *env = &cpu->env;
1663    int ret;
1664
1665    qemu_mutex_lock_iothread();
1666
1667    switch (run->exit_reason) {
1668    case KVM_EXIT_DCR:
1669        if (run->dcr.is_write) {
1670            DPRINTF("handle dcr write\n");
1671            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672        } else {
1673            DPRINTF("handle dcr read\n");
1674            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675        }
1676        break;
1677    case KVM_EXIT_HLT:
1678        DPRINTF("handle halt\n");
1679        ret = kvmppc_handle_halt(cpu);
1680        break;
1681#if defined(TARGET_PPC64)
1682    case KVM_EXIT_PAPR_HCALL:
1683        DPRINTF("handle PAPR hypercall\n");
1684        run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                              run->papr_hcall.nr,
1686                                              run->papr_hcall.args);
1687        ret = 0;
1688        break;
1689#endif
1690    case KVM_EXIT_EPR:
1691        DPRINTF("handle epr\n");
1692        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693        ret = 0;
1694        break;
1695    case KVM_EXIT_WATCHDOG:
1696        DPRINTF("handle watchdog expiry\n");
1697        watchdog_perform_action();
1698        ret = 0;
1699        break;
1700
1701    case KVM_EXIT_DEBUG:
1702        DPRINTF("handle debug exception\n");
1703        if (kvm_handle_debug(cpu, run)) {
1704            ret = EXCP_DEBUG;
1705            break;
1706        }
1707        /* re-enter, this exception was guest-internal */
1708        ret = 0;
1709        break;
1710
1711    default:
1712        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713        ret = -1;
1714        break;
1715    }
1716
1717    qemu_mutex_unlock_iothread();
1718    return ret;
1719}
1720
1721int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722{
1723    CPUState *cs = CPU(cpu);
1724    uint32_t bits = tsr_bits;
1725    struct kvm_one_reg reg = {
1726        .id = KVM_REG_PPC_OR_TSR,
1727        .addr = (uintptr_t) &bits,
1728    };
1729
1730    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731}
1732
1733int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734{
1735
1736    CPUState *cs = CPU(cpu);
1737    uint32_t bits = tsr_bits;
1738    struct kvm_one_reg reg = {
1739        .id = KVM_REG_PPC_CLEAR_TSR,
1740        .addr = (uintptr_t) &bits,
1741    };
1742
1743    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744}
1745
1746int kvmppc_set_tcr(PowerPCCPU *cpu)
1747{
1748    CPUState *cs = CPU(cpu);
1749    CPUPPCState *env = &cpu->env;
1750    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751
1752    struct kvm_one_reg reg = {
1753        .id = KVM_REG_PPC_TCR,
1754        .addr = (uintptr_t) &tcr,
1755    };
1756
1757    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758}
1759
1760int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761{
1762    CPUState *cs = CPU(cpu);
1763    int ret;
1764
1765    if (!kvm_enabled()) {
1766        return -1;
1767    }
1768
1769    if (!cap_ppc_watchdog) {
1770        printf("warning: KVM does not support watchdog");
1771        return -1;
1772    }
1773
1774    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775    if (ret < 0) {
1776        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                __func__, strerror(-ret));
1778        return ret;
1779    }
1780
1781    return ret;
1782}
1783
1784static int read_cpuinfo(const char *field, char *value, int len)
1785{
1786    FILE *f;
1787    int ret = -1;
1788    int field_len = strlen(field);
1789    char line[512];
1790
1791    f = fopen("/proc/cpuinfo", "r");
1792    if (!f) {
1793        return -1;
1794    }
1795
1796    do {
1797        if (!fgets(line, sizeof(line), f)) {
1798            break;
1799        }
1800        if (!strncmp(line, field, field_len)) {
1801            pstrcpy(value, len, line);
1802            ret = 0;
1803            break;
1804        }
1805    } while(*line);
1806
1807    fclose(f);
1808
1809    return ret;
1810}
1811
1812uint32_t kvmppc_get_tbfreq(void)
1813{
1814    char line[512];
1815    char *ns;
1816    uint32_t retval = NANOSECONDS_PER_SECOND;
1817
1818    if (read_cpuinfo("timebase", line, sizeof(line))) {
1819        return retval;
1820    }
1821
1822    if (!(ns = strchr(line, ':'))) {
1823        return retval;
1824    }
1825
1826    ns++;
1827
1828    return atoi(ns);
1829}
1830
1831bool kvmppc_get_host_serial(char **value)
1832{
1833    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834                               NULL);
1835}
1836
1837bool kvmppc_get_host_model(char **value)
1838{
1839    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840}
1841
1842/* Try to find a device tree node for a CPU with clock-frequency property */
1843static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844{
1845    struct dirent *dirp;
1846    DIR *dp;
1847
1848    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850        return -1;
1851    }
1852
1853    buf[0] = '\0';
1854    while ((dirp = readdir(dp)) != NULL) {
1855        FILE *f;
1856        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857                 dirp->d_name);
1858        f = fopen(buf, "r");
1859        if (f) {
1860            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861            fclose(f);
1862            break;
1863        }
1864        buf[0] = '\0';
1865    }
1866    closedir(dp);
1867    if (buf[0] == '\0') {
1868        printf("Unknown host!\n");
1869        return -1;
1870    }
1871
1872    return 0;
1873}
1874
1875static uint64_t kvmppc_read_int_dt(const char *filename)
1876{
1877    union {
1878        uint32_t v32;
1879        uint64_t v64;
1880    } u;
1881    FILE *f;
1882    int len;
1883
1884    f = fopen(filename, "rb");
1885    if (!f) {
1886        return -1;
1887    }
1888
1889    len = fread(&u, 1, sizeof(u), f);
1890    fclose(f);
1891    switch (len) {
1892    case 4:
1893        /* property is a 32-bit quantity */
1894        return be32_to_cpu(u.v32);
1895    case 8:
1896        return be64_to_cpu(u.v64);
1897    }
1898
1899    return 0;
1900}
1901
1902/* Read a CPU node property from the host device tree that's a single
1903 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1904 * (can't find or open the property, or doesn't understand the
1905 * format) */
1906static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907{
1908    char buf[PATH_MAX], *tmp;
1909    uint64_t val;
1910
1911    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912        return -1;
1913    }
1914
1915    tmp = g_strdup_printf("%s/%s", buf, propname);
1916    val = kvmppc_read_int_dt(tmp);
1917    g_free(tmp);
1918
1919    return val;
1920}
1921
1922uint64_t kvmppc_get_clockfreq(void)
1923{
1924    return kvmppc_read_int_cpu_dt("clock-frequency");
1925}
1926
1927static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928 {
1929     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930     CPUState *cs = CPU(cpu);
1931
1932    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934        return 0;
1935    }
1936
1937    return 1;
1938}
1939
1940int kvmppc_get_hasidle(CPUPPCState *env)
1941{
1942    struct kvm_ppc_pvinfo pvinfo;
1943
1944    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946        return 1;
1947    }
1948
1949    return 0;
1950}
1951
1952int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953{
1954    uint32_t *hc = (uint32_t*)buf;
1955    struct kvm_ppc_pvinfo pvinfo;
1956
1957    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958        memcpy(buf, pvinfo.hcall, buf_len);
1959        return 0;
1960    }
1961
1962    /*
1963     * Fallback to always fail hypercalls regardless of endianness:
1964     *
1965     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966     *     li r3, -1
1967     *     b .+8       (becomes nop in wrong endian)
1968     *     bswap32(li r3, -1)
1969     */
1970
1971    hc[0] = cpu_to_be32(0x08000048);
1972    hc[1] = cpu_to_be32(0x3860ffff);
1973    hc[2] = cpu_to_be32(0x48000008);
1974    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975
1976    return 1;
1977}
1978
1979static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980{
1981    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982}
1983
1984void kvmppc_enable_logical_ci_hcalls(void)
1985{
1986    /*
1987     * FIXME: it would be nice if we could detect the cases where
1988     * we're using a device which requires the in kernel
1989     * implementation of these hcalls, but the kernel lacks them and
1990     * produce a warning.
1991     */
1992    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994}
1995
1996void kvmppc_enable_set_mode_hcall(void)
1997{
1998    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999}
2000
2001void kvmppc_enable_clear_ref_mod_hcalls(void)
2002{
2003    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005}
2006
2007void kvmppc_set_papr(PowerPCCPU *cpu)
2008{
2009    CPUState *cs = CPU(cpu);
2010    int ret;
2011
2012    if (!kvm_enabled()) {
2013        return;
2014    }
2015
2016    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017    if (ret) {
2018        error_report("This vCPU type or KVM version does not support PAPR");
2019        exit(1);
2020    }
2021
2022    /* Update the capability flag so we sync the right information
2023     * with kvm */
2024    cap_papr = 1;
2025}
2026
2027int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028{
2029    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030}
2031
2032void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033{
2034    CPUState *cs = CPU(cpu);
2035    int ret;
2036
2037    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038    if (ret && mpic_proxy) {
2039        error_report("This KVM version does not support EPR");
2040        exit(1);
2041    }
2042}
2043
2044int kvmppc_smt_threads(void)
2045{
2046    return cap_ppc_smt ? cap_ppc_smt : 1;
2047}
2048
2049int kvmppc_set_smt_threads(int smt)
2050{
2051    int ret;
2052
2053    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054    if (!ret) {
2055        cap_ppc_smt = smt;
2056    }
2057    return ret;
2058}
2059
2060void kvmppc_hint_smt_possible(Error **errp)
2061{
2062    int i;
2063    GString *g;
2064    char *s;
2065
2066    assert(kvm_enabled());
2067    if (cap_ppc_smt_possible) {
2068        g = g_string_new("Available VSMT modes:");
2069        for (i = 63; i >= 0; i--) {
2070            if ((1UL << i) & cap_ppc_smt_possible) {
2071                g_string_append_printf(g, " %lu", (1UL << i));
2072            }
2073        }
2074        s = g_string_free(g, false);
2075        error_append_hint(errp, "%s.\n", s);
2076        g_free(s);
2077    } else {
2078        error_append_hint(errp,
2079                          "This KVM seems to be too old to support VSMT.\n");
2080    }
2081}
2082
2083
2084#ifdef TARGET_PPC64
2085uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086{
2087    struct kvm_ppc_smmu_info info;
2088    long rampagesize, best_page_shift;
2089    int i;
2090
2091    /* Find the largest hardware supported page size that's less than
2092     * or equal to the (logical) backing page size of guest RAM */
2093    kvm_get_smmu_info(&info, &error_fatal);
2094    rampagesize = qemu_getrampagesize();
2095    best_page_shift = 0;
2096
2097    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099
2100        if (!sps->page_shift) {
2101            continue;
2102        }
2103
2104        if ((sps->page_shift > best_page_shift)
2105            && ((1UL << sps->page_shift) <= rampagesize)) {
2106            best_page_shift = sps->page_shift;
2107        }
2108    }
2109
2110    return MIN(current_size,
2111               1ULL << (best_page_shift + hash_shift - 7));
2112}
2113#endif
2114
2115bool kvmppc_spapr_use_multitce(void)
2116{
2117    return cap_spapr_multitce;
2118}
2119
2120int kvmppc_spapr_enable_inkernel_multitce(void)
2121{
2122    int ret;
2123
2124    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125                            H_PUT_TCE_INDIRECT, 1);
2126    if (!ret) {
2127        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                                H_STUFF_TCE, 1);
2129    }
2130
2131    return ret;
2132}
2133
2134void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135                              uint64_t bus_offset, uint32_t nb_table,
2136                              int *pfd, bool need_vfio)
2137{
2138    long len;
2139    int fd;
2140    void *table;
2141
2142    /* Must set fd to -1 so we don't try to munmap when called for
2143     * destroying the table, which the upper layers -will- do
2144     */
2145    *pfd = -1;
2146    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147        return NULL;
2148    }
2149
2150    if (cap_spapr_tce_64) {
2151        struct kvm_create_spapr_tce_64 args = {
2152            .liobn = liobn,
2153            .page_shift = page_shift,
2154            .offset = bus_offset >> page_shift,
2155            .size = nb_table,
2156            .flags = 0
2157        };
2158        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159        if (fd < 0) {
2160            fprintf(stderr,
2161                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162                    liobn);
2163            return NULL;
2164        }
2165    } else if (cap_spapr_tce) {
2166        uint64_t window_size = (uint64_t) nb_table << page_shift;
2167        struct kvm_create_spapr_tce args = {
2168            .liobn = liobn,
2169            .window_size = window_size,
2170        };
2171        if ((window_size != args.window_size) || bus_offset) {
2172            return NULL;
2173        }
2174        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175        if (fd < 0) {
2176            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177                    liobn);
2178            return NULL;
2179        }
2180    } else {
2181        return NULL;
2182    }
2183
2184    len = nb_table * sizeof(uint64_t);
2185    /* FIXME: round this up to page size */
2186
2187    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188    if (table == MAP_FAILED) {
2189        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190                liobn);
2191        close(fd);
2192        return NULL;
2193    }
2194
2195    *pfd = fd;
2196    return table;
2197}
2198
2199int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200{
2201    long len;
2202
2203    if (fd < 0) {
2204        return -1;
2205    }
2206
2207    len = nb_table * sizeof(uint64_t);
2208    if ((munmap(table, len) < 0) ||
2209        (close(fd) < 0)) {
2210        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211                strerror(errno));
2212        /* Leak the table */
2213    }
2214
2215    return 0;
2216}
2217
2218int kvmppc_reset_htab(int shift_hint)
2219{
2220    uint32_t shift = shift_hint;
2221
2222    if (!kvm_enabled()) {
2223        /* Full emulation, tell caller to allocate htab itself */
2224        return 0;
2225    }
2226    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227        int ret;
2228        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229        if (ret == -ENOTTY) {
2230            /* At least some versions of PR KVM advertise the
2231             * capability, but don't implement the ioctl().  Oops.
2232             * Return 0 so that we allocate the htab in qemu, as is
2233             * correct for PR. */
2234            return 0;
2235        } else if (ret < 0) {
2236            return ret;
2237        }
2238        return shift;
2239    }
2240
2241    /* We have a kernel that predates the htab reset calls.  For PR
2242     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243     * this era, it has allocated a 16MB fixed size hash table already. */
2244    if (kvmppc_is_pr(kvm_state)) {
2245        /* PR - tell caller to allocate htab */
2246        return 0;
2247    } else {
2248        /* HV - assume 16MB kernel allocated htab */
2249        return 24;
2250    }
2251}
2252
2253static inline uint32_t mfpvr(void)
2254{
2255    uint32_t pvr;
2256
2257    asm ("mfpvr %0"
2258         : "=r"(pvr));
2259    return pvr;
2260}
2261
2262static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263{
2264    if (on) {
2265        *word |= flags;
2266    } else {
2267        *word &= ~flags;
2268    }
2269}
2270
2271static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272{
2273    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276
2277    /* Now fix up the class with information we can query from the host */
2278    pcc->pvr = mfpvr();
2279
2280    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286
2287    if (dcache_size != -1) {
2288        pcc->l1_dcache_size = dcache_size;
2289    }
2290
2291    if (icache_size != -1) {
2292        pcc->l1_icache_size = icache_size;
2293    }
2294
2295#if defined(TARGET_PPC64)
2296    pcc->radix_page_info = kvm_get_radix_page_info();
2297
2298    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299        /*
2300         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301         * compliant.  More importantly, advertising ISA 3.00
2302         * architected mode may prevent guests from activating
2303         * necessary DD1 workarounds.
2304         */
2305        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307    }
2308#endif /* defined(TARGET_PPC64) */
2309}
2310
2311bool kvmppc_has_cap_epr(void)
2312{
2313    return cap_epr;
2314}
2315
2316bool kvmppc_has_cap_fixup_hcalls(void)
2317{
2318    return cap_fixup_hcalls;
2319}
2320
2321bool kvmppc_has_cap_htm(void)
2322{
2323    return cap_htm;
2324}
2325
2326bool kvmppc_has_cap_mmu_radix(void)
2327{
2328    return cap_mmu_radix;
2329}
2330
2331bool kvmppc_has_cap_mmu_hash_v3(void)
2332{
2333    return cap_mmu_hash_v3;
2334}
2335
2336static bool kvmppc_power8_host(void)
2337{
2338    bool ret = false;
2339#ifdef TARGET_PPC64
2340    {
2341        uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342        ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343              (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344              (base_pvr == CPU_POWERPC_POWER8_BASE);
2345    }
2346#endif /* TARGET_PPC64 */
2347    return ret;
2348}
2349
2350static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351{
2352    bool l1d_thread_priv_req = !kvmppc_power8_host();
2353
2354    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355        return 2;
2356    } else if ((!l1d_thread_priv_req ||
2357                c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358               (c.character & c.character_mask
2359                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360        return 1;
2361    }
2362
2363    return 0;
2364}
2365
2366static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367{
2368    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369        return 2;
2370    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371        return 1;
2372    }
2373
2374    return 0;
2375}
2376
2377static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378{
2379    if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380        return  SPAPR_CAP_FIXED_CCD;
2381    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382        return SPAPR_CAP_FIXED_IBS;
2383    }
2384
2385    return 0;
2386}
2387
2388static void kvmppc_get_cpu_characteristics(KVMState *s)
2389{
2390    struct kvm_ppc_cpu_char c;
2391    int ret;
2392
2393    /* Assume broken */
2394    cap_ppc_safe_cache = 0;
2395    cap_ppc_safe_bounds_check = 0;
2396    cap_ppc_safe_indirect_branch = 0;
2397
2398    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399    if (!ret) {
2400        return;
2401    }
2402    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403    if (ret < 0) {
2404        return;
2405    }
2406
2407    cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408    cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409    cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410}
2411
2412int kvmppc_get_cap_safe_cache(void)
2413{
2414    return cap_ppc_safe_cache;
2415}
2416
2417int kvmppc_get_cap_safe_bounds_check(void)
2418{
2419    return cap_ppc_safe_bounds_check;
2420}
2421
2422int kvmppc_get_cap_safe_indirect_branch(void)
2423{
2424    return cap_ppc_safe_indirect_branch;
2425}
2426
2427bool kvmppc_has_cap_spapr_vfio(void)
2428{
2429    return cap_spapr_vfio;
2430}
2431
2432PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2433{
2434    uint32_t host_pvr = mfpvr();
2435    PowerPCCPUClass *pvr_pcc;
2436
2437    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2438    if (pvr_pcc == NULL) {
2439        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2440    }
2441
2442    return pvr_pcc;
2443}
2444
2445static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2446{
2447    TypeInfo type_info = {
2448        .name = TYPE_HOST_POWERPC_CPU,
2449        .class_init = kvmppc_host_cpu_class_init,
2450    };
2451    MachineClass *mc = MACHINE_GET_CLASS(ms);
2452    PowerPCCPUClass *pvr_pcc;
2453    ObjectClass *oc;
2454    DeviceClass *dc;
2455    int i;
2456
2457    pvr_pcc = kvm_ppc_get_host_cpu_class();
2458    if (pvr_pcc == NULL) {
2459        return -1;
2460    }
2461    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2462    type_register(&type_info);
2463    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2464        /* override TCG default cpu type with 'host' cpu model */
2465        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2466    }
2467
2468    oc = object_class_by_name(type_info.name);
2469    g_assert(oc);
2470
2471    /*
2472     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2473     * we want "POWER8" to be a "family" alias that points to the current
2474     * host CPU type, too)
2475     */
2476    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2477    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2478        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2479            char *suffix;
2480
2481            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2482            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2483            if (suffix) {
2484                *suffix = 0;
2485            }
2486            break;
2487        }
2488    }
2489
2490    return 0;
2491}
2492
2493int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2494{
2495    struct kvm_rtas_token_args args = {
2496        .token = token,
2497    };
2498
2499    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2500        return -ENOENT;
2501    }
2502
2503    strncpy(args.name, function, sizeof(args.name));
2504
2505    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2506}
2507
2508int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2509{
2510    struct kvm_get_htab_fd s = {
2511        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2512        .start_index = index,
2513    };
2514    int ret;
2515
2516    if (!cap_htab_fd) {
2517        error_setg(errp, "KVM version doesn't support %s the HPT",
2518                   write ? "writing" : "reading");
2519        return -ENOTSUP;
2520    }
2521
2522    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2523    if (ret < 0) {
2524        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2525                   write ? "writing" : "reading", write ? "to" : "from",
2526                   strerror(errno));
2527        return -errno;
2528    }
2529
2530    return ret;
2531}
2532
2533int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2534{
2535    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2536    uint8_t buf[bufsize];
2537    ssize_t rc;
2538
2539    do {
2540        rc = read(fd, buf, bufsize);
2541        if (rc < 0) {
2542            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2543                    strerror(errno));
2544            return rc;
2545        } else if (rc) {
2546            uint8_t *buffer = buf;
2547            ssize_t n = rc;
2548            while (n) {
2549                struct kvm_get_htab_header *head =
2550                    (struct kvm_get_htab_header *) buffer;
2551                size_t chunksize = sizeof(*head) +
2552                     HASH_PTE_SIZE_64 * head->n_valid;
2553
2554                qemu_put_be32(f, head->index);
2555                qemu_put_be16(f, head->n_valid);
2556                qemu_put_be16(f, head->n_invalid);
2557                qemu_put_buffer(f, (void *)(head + 1),
2558                                HASH_PTE_SIZE_64 * head->n_valid);
2559
2560                buffer += chunksize;
2561                n -= chunksize;
2562            }
2563        }
2564    } while ((rc != 0)
2565             && ((max_ns < 0)
2566                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2567
2568    return (rc == 0) ? 1 : 0;
2569}
2570
2571int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2572                           uint16_t n_valid, uint16_t n_invalid)
2573{
2574    struct kvm_get_htab_header *buf;
2575    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2576    ssize_t rc;
2577
2578    buf = alloca(chunksize);
2579    buf->index = index;
2580    buf->n_valid = n_valid;
2581    buf->n_invalid = n_invalid;
2582
2583    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2584
2585    rc = write(fd, buf, chunksize);
2586    if (rc < 0) {
2587        fprintf(stderr, "Error writing KVM hash table: %s\n",
2588                strerror(errno));
2589        return rc;
2590    }
2591    if (rc != chunksize) {
2592        /* We should never get a short write on a single chunk */
2593        fprintf(stderr, "Short write, restoring KVM hash table\n");
2594        return -1;
2595    }
2596    return 0;
2597}
2598
2599bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2600{
2601    return true;
2602}
2603
2604void kvm_arch_init_irq_routing(KVMState *s)
2605{
2606}
2607
2608void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2609{
2610    int fd, rc;
2611    int i;
2612
2613    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2614
2615    i = 0;
2616    while (i < n) {
2617        struct kvm_get_htab_header *hdr;
2618        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2619        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2620
2621        rc = read(fd, buf, sizeof(buf));
2622        if (rc < 0) {
2623            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624        }
2625
2626        hdr = (struct kvm_get_htab_header *)buf;
2627        while ((i < n) && ((char *)hdr < (buf + rc))) {
2628            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2629
2630            if (hdr->index != (ptex + i)) {
2631                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2632                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2633            }
2634
2635            if (n - i < valid) {
2636                valid = n - i;
2637            }
2638            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2639            i += valid;
2640
2641            if ((n - i) < invalid) {
2642                invalid = n - i;
2643            }
2644            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645            i += invalid;
2646
2647            hdr = (struct kvm_get_htab_header *)
2648                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2649        }
2650    }
2651
2652    close(fd);
2653}
2654
2655void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2656{
2657    int fd, rc;
2658    struct {
2659        struct kvm_get_htab_header hdr;
2660        uint64_t pte0;
2661        uint64_t pte1;
2662    } buf;
2663
2664    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2665
2666    buf.hdr.n_valid = 1;
2667    buf.hdr.n_invalid = 0;
2668    buf.hdr.index = ptex;
2669    buf.pte0 = cpu_to_be64(pte0);
2670    buf.pte1 = cpu_to_be64(pte1);
2671
2672    rc = write(fd, &buf, sizeof(buf));
2673    if (rc != sizeof(buf)) {
2674        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2675    }
2676    close(fd);
2677}
2678
2679int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2680                             uint64_t address, uint32_t data, PCIDevice *dev)
2681{
2682    return 0;
2683}
2684
2685int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2686                                int vector, PCIDevice *dev)
2687{
2688    return 0;
2689}
2690
2691int kvm_arch_release_virq_post(int virq)
2692{
2693    return 0;
2694}
2695
2696int kvm_arch_msi_data_to_gsi(uint32_t data)
2697{
2698    return data & 0xffff;
2699}
2700
2701int kvmppc_enable_hwrng(void)
2702{
2703    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2704        return -1;
2705    }
2706
2707    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2708}
2709
2710void kvmppc_check_papr_resize_hpt(Error **errp)
2711{
2712    if (!kvm_enabled()) {
2713        return; /* No KVM, we're good */
2714    }
2715
2716    if (cap_resize_hpt) {
2717        return; /* Kernel has explicit support, we're good */
2718    }
2719
2720    /* Otherwise fallback on looking for PR KVM */
2721    if (kvmppc_is_pr(kvm_state)) {
2722        return;
2723    }
2724
2725    error_setg(errp,
2726               "Hash page table resizing not available with this KVM version");
2727}
2728
2729int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2730{
2731    CPUState *cs = CPU(cpu);
2732    struct kvm_ppc_resize_hpt rhpt = {
2733        .flags = flags,
2734        .shift = shift,
2735    };
2736
2737    if (!cap_resize_hpt) {
2738        return -ENOSYS;
2739    }
2740
2741    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2742}
2743
2744int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2745{
2746    CPUState *cs = CPU(cpu);
2747    struct kvm_ppc_resize_hpt rhpt = {
2748        .flags = flags,
2749        .shift = shift,
2750    };
2751
2752    if (!cap_resize_hpt) {
2753        return -ENOSYS;
2754    }
2755
2756    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2757}
2758
2759/*
2760 * This is a helper function to detect a post migration scenario
2761 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2762 * the guest kernel can't handle a PVR value other than the actual host
2763 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2764 *
2765 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2766 * (so, we're HV), return true. The workaround itself is done in
2767 * cpu_post_load.
2768 *
2769 * The order here is important: we'll only check for KVM PR as a
2770 * fallback if the guest kernel can't handle the situation itself.
2771 * We need to avoid as much as possible querying the running KVM type
2772 * in QEMU level.
2773 */
2774bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2775{
2776    CPUState *cs = CPU(cpu);
2777
2778    if (!kvm_enabled()) {
2779        return false;
2780    }
2781
2782    if (cap_ppc_pvr_compat) {
2783        return false;
2784    }
2785
2786    return !kvmppc_is_pr(cs->kvm_state);
2787}
2788