qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_vio.h"
  40#include "hw/ppc/spapr_cpu_core.h"
  41#include "hw/ppc/ppc.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/mmap-alloc.h"
  50#include "elf.h"
  51#include "sysemu/kvm_int.h"
  52
  53//#define DEBUG_KVM
  54
  55#ifdef DEBUG_KVM
  56#define DPRINTF(fmt, ...) \
  57    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58#else
  59#define DPRINTF(fmt, ...) \
  60    do { } while (0)
  61#endif
  62
  63#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66    KVM_CAP_LAST_INFO
  67};
  68
  69static int cap_interrupt_unset = false;
  70static int cap_interrupt_level = false;
  71static int cap_segstate;
  72static int cap_booke_sregs;
  73static int cap_ppc_smt;
  74static int cap_ppc_smt_possible;
  75static int cap_spapr_tce;
  76static int cap_spapr_tce_64;
  77static int cap_spapr_multitce;
  78static int cap_spapr_vfio;
  79static int cap_hior;
  80static int cap_one_reg;
  81static int cap_epr;
  82static int cap_ppc_watchdog;
  83static int cap_papr;
  84static int cap_htab_fd;
  85static int cap_fixup_hcalls;
  86static int cap_htm;             /* Hardware transactional memory support */
  87static int cap_mmu_radix;
  88static int cap_mmu_hash_v3;
  89static int cap_resize_hpt;
  90static int cap_ppc_pvr_compat;
  91static int cap_ppc_safe_cache;
  92static int cap_ppc_safe_bounds_check;
  93static int cap_ppc_safe_indirect_branch;
  94static int cap_ppc_nested_kvm_hv;
  95
  96static uint32_t debug_inst_opcode;
  97
  98/* XXX We have a race condition where we actually have a level triggered
  99 *     interrupt, but the infrastructure can't expose that yet, so the guest
 100 *     takes but ignores it, goes to sleep and never gets notified that there's
 101 *     still an interrupt pending.
 102 *
 103 *     As a quick workaround, let's just wake up again 20 ms after we injected
 104 *     an interrupt. That way we can assure that we're always reinjecting
 105 *     interrupts in case the guest swallowed them.
 106 */
 107static QEMUTimer *idle_timer;
 108
 109static void kvm_kick_cpu(void *opaque)
 110{
 111    PowerPCCPU *cpu = opaque;
 112
 113    qemu_cpu_kick(CPU(cpu));
 114}
 115
 116/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 117 * should only be used for fallback tests - generally we should use
 118 * explicit capabilities for the features we want, rather than
 119 * assuming what is/isn't available depending on the KVM variant. */
 120static bool kvmppc_is_pr(KVMState *ks)
 121{
 122    /* Assume KVM-PR if the GET_PVINFO capability is available */
 123    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 124}
 125
 126static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 127static void kvmppc_get_cpu_characteristics(KVMState *s);
 128
 129int kvm_arch_init(MachineState *ms, KVMState *s)
 130{
 131    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 132    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 133    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 134    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 135    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 136    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 137    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 138    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 139    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 140    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 141    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 142    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 143    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 144    /* Note: we don't set cap_papr here, because this capability is
 145     * only activated after this by kvmppc_set_papr() */
 146    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 147    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 148    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 149    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 150    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 151    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 152    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 153    kvmppc_get_cpu_characteristics(s);
 154    cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
 155    /*
 156     * Note: setting it to false because there is not such capability
 157     * in KVM at this moment.
 158     *
 159     * TODO: call kvm_vm_check_extension() with the right capability
 160     * after the kernel starts implementing it.*/
 161    cap_ppc_pvr_compat = false;
 162
 163    if (!cap_interrupt_level) {
 164        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 165                        "VM to stall at times!\n");
 166    }
 167
 168    kvm_ppc_register_host_cpu_type(ms);
 169
 170    return 0;
 171}
 172
 173int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 174{
 175    return 0;
 176}
 177
 178static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 179{
 180    CPUPPCState *cenv = &cpu->env;
 181    CPUState *cs = CPU(cpu);
 182    struct kvm_sregs sregs;
 183    int ret;
 184
 185    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 186        /* What we're really trying to say is "if we're on BookE, we use
 187           the native PVR for now". This is the only sane way to check
 188           it though, so we potentially confuse users that they can run
 189           BookE guests on BookS. Let's hope nobody dares enough :) */
 190        return 0;
 191    } else {
 192        if (!cap_segstate) {
 193            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 194            return -ENOSYS;
 195        }
 196    }
 197
 198    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 199    if (ret) {
 200        return ret;
 201    }
 202
 203    sregs.pvr = cenv->spr[SPR_PVR];
 204    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 205}
 206
 207/* Set up a shared TLB array with KVM */
 208static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 209{
 210    CPUPPCState *env = &cpu->env;
 211    CPUState *cs = CPU(cpu);
 212    struct kvm_book3e_206_tlb_params params = {};
 213    struct kvm_config_tlb cfg = {};
 214    unsigned int entries = 0;
 215    int ret, i;
 216
 217    if (!kvm_enabled() ||
 218        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 219        return 0;
 220    }
 221
 222    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 223
 224    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 225        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 226        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 227        entries += params.tlb_sizes[i];
 228    }
 229
 230    assert(entries == env->nb_tlb);
 231    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 232
 233    env->tlb_dirty = true;
 234
 235    cfg.array = (uintptr_t)env->tlb.tlbm;
 236    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 237    cfg.params = (uintptr_t)&params;
 238    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 239
 240    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 241    if (ret < 0) {
 242        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 243                __func__, strerror(-ret));
 244        return ret;
 245    }
 246
 247    env->kvm_sw_tlb = true;
 248    return 0;
 249}
 250
 251
 252#if defined(TARGET_PPC64)
 253static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 254{
 255    int ret;
 256
 257    assert(kvm_state != NULL);
 258
 259    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 260        error_setg(errp, "KVM doesn't expose the MMU features it supports");
 261        error_append_hint(errp, "Consider switching to a newer KVM\n");
 262        return;
 263    }
 264
 265    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 266    if (ret == 0) {
 267        return;
 268    }
 269
 270    error_setg_errno(errp, -ret,
 271                     "KVM failed to provide the MMU features it supports");
 272}
 273
 274struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 275{
 276    KVMState *s = KVM_STATE(current_machine->accelerator);
 277    struct ppc_radix_page_info *radix_page_info;
 278    struct kvm_ppc_rmmu_info rmmu_info;
 279    int i;
 280
 281    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 282        return NULL;
 283    }
 284    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 285        return NULL;
 286    }
 287    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 288    radix_page_info->count = 0;
 289    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 290        if (rmmu_info.ap_encodings[i]) {
 291            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 292            radix_page_info->count++;
 293        }
 294    }
 295    return radix_page_info;
 296}
 297
 298target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 299                                     bool radix, bool gtse,
 300                                     uint64_t proc_tbl)
 301{
 302    CPUState *cs = CPU(cpu);
 303    int ret;
 304    uint64_t flags = 0;
 305    struct kvm_ppc_mmuv3_cfg cfg = {
 306        .process_table = proc_tbl,
 307    };
 308
 309    if (radix) {
 310        flags |= KVM_PPC_MMUV3_RADIX;
 311    }
 312    if (gtse) {
 313        flags |= KVM_PPC_MMUV3_GTSE;
 314    }
 315    cfg.flags = flags;
 316    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 317    switch (ret) {
 318    case 0:
 319        return H_SUCCESS;
 320    case -EINVAL:
 321        return H_PARAMETER;
 322    case -ENODEV:
 323        return H_NOT_AVAILABLE;
 324    default:
 325        return H_HARDWARE;
 326    }
 327}
 328
 329bool kvmppc_hpt_needs_host_contiguous_pages(void)
 330{
 331    static struct kvm_ppc_smmu_info smmu_info;
 332
 333    if (!kvm_enabled()) {
 334        return false;
 335    }
 336
 337    kvm_get_smmu_info(&smmu_info, &error_fatal);
 338    return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 339}
 340
 341void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 342{
 343    struct kvm_ppc_smmu_info smmu_info;
 344    int iq, ik, jq, jk;
 345    Error *local_err = NULL;
 346
 347    /* For now, we only have anything to check on hash64 MMUs */
 348    if (!cpu->hash64_opts || !kvm_enabled()) {
 349        return;
 350    }
 351
 352    kvm_get_smmu_info(&smmu_info, &local_err);
 353    if (local_err) {
 354        error_propagate(errp, local_err);
 355        return;
 356    }
 357
 358    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 359        && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 360        error_setg(errp,
 361                   "KVM does not support 1TiB segments which guest expects");
 362        return;
 363    }
 364
 365    if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 366        error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 367                   smmu_info.slb_size, cpu->hash64_opts->slb_size);
 368        return;
 369    }
 370
 371    /*
 372     * Verify that every pagesize supported by the cpu model is
 373     * supported by KVM with the same encodings
 374     */
 375    for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 376        PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 377        struct kvm_ppc_one_seg_page_size *ksps;
 378
 379        for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 380            if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 381                break;
 382            }
 383        }
 384        if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 385            error_setg(errp, "KVM doesn't support for base page shift %u",
 386                       qsps->page_shift);
 387            return;
 388        }
 389
 390        ksps = &smmu_info.sps[ik];
 391        if (ksps->slb_enc != qsps->slb_enc) {
 392            error_setg(errp,
 393"KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 394                       ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 395            return;
 396        }
 397
 398        for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 399            for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 400                if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 401                    break;
 402                }
 403            }
 404
 405            if (jk >= ARRAY_SIZE(ksps->enc)) {
 406                error_setg(errp, "KVM doesn't support page shift %u/%u",
 407                           qsps->enc[jq].page_shift, qsps->page_shift);
 408                return;
 409            }
 410            if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 411                error_setg(errp,
 412"KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 413                           ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 414                           qsps->page_shift, qsps->enc[jq].pte_enc);
 415                return;
 416            }
 417        }
 418    }
 419
 420    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 421        /* Mostly what guest pagesizes we can use are related to the
 422         * host pages used to map guest RAM, which is handled in the
 423         * platform code. Cache-Inhibited largepages (64k) however are
 424         * used for I/O, so if they're mapped to the host at all it
 425         * will be a normal mapping, not a special hugepage one used
 426         * for RAM. */
 427        if (getpagesize() < 0x10000) {
 428            error_setg(errp,
 429                       "KVM can't supply 64kiB CI pages, which guest expects");
 430        }
 431    }
 432}
 433#endif /* !defined (TARGET_PPC64) */
 434
 435unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 436{
 437    return POWERPC_CPU(cpu)->vcpu_id;
 438}
 439
 440/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 441 * book3s supports only 1 watchpoint, so array size
 442 * of 4 is sufficient for now.
 443 */
 444#define MAX_HW_BKPTS 4
 445
 446static struct HWBreakpoint {
 447    target_ulong addr;
 448    int type;
 449} hw_debug_points[MAX_HW_BKPTS];
 450
 451static CPUWatchpoint hw_watchpoint;
 452
 453/* Default there is no breakpoint and watchpoint supported */
 454static int max_hw_breakpoint;
 455static int max_hw_watchpoint;
 456static int nb_hw_breakpoint;
 457static int nb_hw_watchpoint;
 458
 459static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 460{
 461    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 462        max_hw_breakpoint = 2;
 463        max_hw_watchpoint = 2;
 464    }
 465
 466    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 467        fprintf(stderr, "Error initializing h/w breakpoints\n");
 468        return;
 469    }
 470}
 471
 472int kvm_arch_init_vcpu(CPUState *cs)
 473{
 474    PowerPCCPU *cpu = POWERPC_CPU(cs);
 475    CPUPPCState *cenv = &cpu->env;
 476    int ret;
 477
 478    /* Synchronize sregs with kvm */
 479    ret = kvm_arch_sync_sregs(cpu);
 480    if (ret) {
 481        if (ret == -EINVAL) {
 482            error_report("Register sync failed... If you're using kvm-hv.ko,"
 483                         " only \"-cpu host\" is possible");
 484        }
 485        return ret;
 486    }
 487
 488    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 489
 490    switch (cenv->mmu_model) {
 491    case POWERPC_MMU_BOOKE206:
 492        /* This target supports access to KVM's guest TLB */
 493        ret = kvm_booke206_tlb_init(cpu);
 494        break;
 495    case POWERPC_MMU_2_07:
 496        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 497            /* KVM-HV has transactional memory on POWER8 also without the
 498             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 499             * long as it's availble to userspace on the host. */
 500            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 501                cap_htm = true;
 502            }
 503        }
 504        break;
 505    default:
 506        break;
 507    }
 508
 509    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 510    kvmppc_hw_debug_points_init(cenv);
 511
 512    return ret;
 513}
 514
 515static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 516{
 517    CPUPPCState *env = &cpu->env;
 518    CPUState *cs = CPU(cpu);
 519    struct kvm_dirty_tlb dirty_tlb;
 520    unsigned char *bitmap;
 521    int ret;
 522
 523    if (!env->kvm_sw_tlb) {
 524        return;
 525    }
 526
 527    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 528    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 529
 530    dirty_tlb.bitmap = (uintptr_t)bitmap;
 531    dirty_tlb.num_dirty = env->nb_tlb;
 532
 533    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 534    if (ret) {
 535        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 536                __func__, strerror(-ret));
 537    }
 538
 539    g_free(bitmap);
 540}
 541
 542static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 543{
 544    PowerPCCPU *cpu = POWERPC_CPU(cs);
 545    CPUPPCState *env = &cpu->env;
 546    union {
 547        uint32_t u32;
 548        uint64_t u64;
 549    } val;
 550    struct kvm_one_reg reg = {
 551        .id = id,
 552        .addr = (uintptr_t) &val,
 553    };
 554    int ret;
 555
 556    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 557    if (ret != 0) {
 558        trace_kvm_failed_spr_get(spr, strerror(errno));
 559    } else {
 560        switch (id & KVM_REG_SIZE_MASK) {
 561        case KVM_REG_SIZE_U32:
 562            env->spr[spr] = val.u32;
 563            break;
 564
 565        case KVM_REG_SIZE_U64:
 566            env->spr[spr] = val.u64;
 567            break;
 568
 569        default:
 570            /* Don't handle this size yet */
 571            abort();
 572        }
 573    }
 574}
 575
 576static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 577{
 578    PowerPCCPU *cpu = POWERPC_CPU(cs);
 579    CPUPPCState *env = &cpu->env;
 580    union {
 581        uint32_t u32;
 582        uint64_t u64;
 583    } val;
 584    struct kvm_one_reg reg = {
 585        .id = id,
 586        .addr = (uintptr_t) &val,
 587    };
 588    int ret;
 589
 590    switch (id & KVM_REG_SIZE_MASK) {
 591    case KVM_REG_SIZE_U32:
 592        val.u32 = env->spr[spr];
 593        break;
 594
 595    case KVM_REG_SIZE_U64:
 596        val.u64 = env->spr[spr];
 597        break;
 598
 599    default:
 600        /* Don't handle this size yet */
 601        abort();
 602    }
 603
 604    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 605    if (ret != 0) {
 606        trace_kvm_failed_spr_set(spr, strerror(errno));
 607    }
 608}
 609
 610static int kvm_put_fp(CPUState *cs)
 611{
 612    PowerPCCPU *cpu = POWERPC_CPU(cs);
 613    CPUPPCState *env = &cpu->env;
 614    struct kvm_one_reg reg;
 615    int i;
 616    int ret;
 617
 618    if (env->insns_flags & PPC_FLOAT) {
 619        uint64_t fpscr = env->fpscr;
 620        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 621
 622        reg.id = KVM_REG_PPC_FPSCR;
 623        reg.addr = (uintptr_t)&fpscr;
 624        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 625        if (ret < 0) {
 626            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 627            return ret;
 628        }
 629
 630        for (i = 0; i < 32; i++) {
 631            uint64_t vsr[2];
 632
 633#ifdef HOST_WORDS_BIGENDIAN
 634            vsr[0] = float64_val(env->fpr[i]);
 635            vsr[1] = env->vsr[i];
 636#else
 637            vsr[0] = env->vsr[i];
 638            vsr[1] = float64_val(env->fpr[i]);
 639#endif
 640            reg.addr = (uintptr_t) &vsr;
 641            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 642
 643            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 644            if (ret < 0) {
 645                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 646                        i, strerror(errno));
 647                return ret;
 648            }
 649        }
 650    }
 651
 652    if (env->insns_flags & PPC_ALTIVEC) {
 653        reg.id = KVM_REG_PPC_VSCR;
 654        reg.addr = (uintptr_t)&env->vscr;
 655        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 656        if (ret < 0) {
 657            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 658            return ret;
 659        }
 660
 661        for (i = 0; i < 32; i++) {
 662            reg.id = KVM_REG_PPC_VR(i);
 663            reg.addr = (uintptr_t)&env->avr[i];
 664            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 665            if (ret < 0) {
 666                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 667                return ret;
 668            }
 669        }
 670    }
 671
 672    return 0;
 673}
 674
 675static int kvm_get_fp(CPUState *cs)
 676{
 677    PowerPCCPU *cpu = POWERPC_CPU(cs);
 678    CPUPPCState *env = &cpu->env;
 679    struct kvm_one_reg reg;
 680    int i;
 681    int ret;
 682
 683    if (env->insns_flags & PPC_FLOAT) {
 684        uint64_t fpscr;
 685        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 686
 687        reg.id = KVM_REG_PPC_FPSCR;
 688        reg.addr = (uintptr_t)&fpscr;
 689        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 690        if (ret < 0) {
 691            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 692            return ret;
 693        } else {
 694            env->fpscr = fpscr;
 695        }
 696
 697        for (i = 0; i < 32; i++) {
 698            uint64_t vsr[2];
 699
 700            reg.addr = (uintptr_t) &vsr;
 701            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 702
 703            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 704            if (ret < 0) {
 705                DPRINTF("Unable to get %s%d from KVM: %s\n",
 706                        vsx ? "VSR" : "FPR", i, strerror(errno));
 707                return ret;
 708            } else {
 709#ifdef HOST_WORDS_BIGENDIAN
 710                env->fpr[i] = vsr[0];
 711                if (vsx) {
 712                    env->vsr[i] = vsr[1];
 713                }
 714#else
 715                env->fpr[i] = vsr[1];
 716                if (vsx) {
 717                    env->vsr[i] = vsr[0];
 718                }
 719#endif
 720            }
 721        }
 722    }
 723
 724    if (env->insns_flags & PPC_ALTIVEC) {
 725        reg.id = KVM_REG_PPC_VSCR;
 726        reg.addr = (uintptr_t)&env->vscr;
 727        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 728        if (ret < 0) {
 729            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 730            return ret;
 731        }
 732
 733        for (i = 0; i < 32; i++) {
 734            reg.id = KVM_REG_PPC_VR(i);
 735            reg.addr = (uintptr_t)&env->avr[i];
 736            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 737            if (ret < 0) {
 738                DPRINTF("Unable to get VR%d from KVM: %s\n",
 739                        i, strerror(errno));
 740                return ret;
 741            }
 742        }
 743    }
 744
 745    return 0;
 746}
 747
 748#if defined(TARGET_PPC64)
 749static int kvm_get_vpa(CPUState *cs)
 750{
 751    PowerPCCPU *cpu = POWERPC_CPU(cs);
 752    sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 753    struct kvm_one_reg reg;
 754    int ret;
 755
 756    reg.id = KVM_REG_PPC_VPA_ADDR;
 757    reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 758    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 759    if (ret < 0) {
 760        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 761        return ret;
 762    }
 763
 764    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 765           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 766    reg.id = KVM_REG_PPC_VPA_SLB;
 767    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 768    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 769    if (ret < 0) {
 770        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 771                strerror(errno));
 772        return ret;
 773    }
 774
 775    assert((uintptr_t)&spapr_cpu->dtl_size
 776           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 777    reg.id = KVM_REG_PPC_VPA_DTL;
 778    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 779    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 780    if (ret < 0) {
 781        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 782                strerror(errno));
 783        return ret;
 784    }
 785
 786    return 0;
 787}
 788
 789static int kvm_put_vpa(CPUState *cs)
 790{
 791    PowerPCCPU *cpu = POWERPC_CPU(cs);
 792    sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 793    struct kvm_one_reg reg;
 794    int ret;
 795
 796    /* SLB shadow or DTL can't be registered unless a master VPA is
 797     * registered.  That means when restoring state, if a VPA *is*
 798     * registered, we need to set that up first.  If not, we need to
 799     * deregister the others before deregistering the master VPA */
 800    assert(spapr_cpu->vpa_addr
 801           || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 802
 803    if (spapr_cpu->vpa_addr) {
 804        reg.id = KVM_REG_PPC_VPA_ADDR;
 805        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 806        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 807        if (ret < 0) {
 808            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 809            return ret;
 810        }
 811    }
 812
 813    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 814           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 815    reg.id = KVM_REG_PPC_VPA_SLB;
 816    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 817    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 818    if (ret < 0) {
 819        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 820        return ret;
 821    }
 822
 823    assert((uintptr_t)&spapr_cpu->dtl_size
 824           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 825    reg.id = KVM_REG_PPC_VPA_DTL;
 826    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 827    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 828    if (ret < 0) {
 829        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 830                strerror(errno));
 831        return ret;
 832    }
 833
 834    if (!spapr_cpu->vpa_addr) {
 835        reg.id = KVM_REG_PPC_VPA_ADDR;
 836        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 837        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 838        if (ret < 0) {
 839            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 840            return ret;
 841        }
 842    }
 843
 844    return 0;
 845}
 846#endif /* TARGET_PPC64 */
 847
 848int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 849{
 850    CPUPPCState *env = &cpu->env;
 851    struct kvm_sregs sregs;
 852    int i;
 853
 854    sregs.pvr = env->spr[SPR_PVR];
 855
 856    if (cpu->vhyp) {
 857        PPCVirtualHypervisorClass *vhc =
 858            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 859        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 860    } else {
 861        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 862    }
 863
 864    /* Sync SLB */
 865#ifdef TARGET_PPC64
 866    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 867        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 868        if (env->slb[i].esid & SLB_ESID_V) {
 869            sregs.u.s.ppc64.slb[i].slbe |= i;
 870        }
 871        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 872    }
 873#endif
 874
 875    /* Sync SRs */
 876    for (i = 0; i < 16; i++) {
 877        sregs.u.s.ppc32.sr[i] = env->sr[i];
 878    }
 879
 880    /* Sync BATs */
 881    for (i = 0; i < 8; i++) {
 882        /* Beware. We have to swap upper and lower bits here */
 883        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 884            | env->DBAT[1][i];
 885        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 886            | env->IBAT[1][i];
 887    }
 888
 889    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 890}
 891
 892int kvm_arch_put_registers(CPUState *cs, int level)
 893{
 894    PowerPCCPU *cpu = POWERPC_CPU(cs);
 895    CPUPPCState *env = &cpu->env;
 896    struct kvm_regs regs;
 897    int ret;
 898    int i;
 899
 900    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 901    if (ret < 0) {
 902        return ret;
 903    }
 904
 905    regs.ctr = env->ctr;
 906    regs.lr  = env->lr;
 907    regs.xer = cpu_read_xer(env);
 908    regs.msr = env->msr;
 909    regs.pc = env->nip;
 910
 911    regs.srr0 = env->spr[SPR_SRR0];
 912    regs.srr1 = env->spr[SPR_SRR1];
 913
 914    regs.sprg0 = env->spr[SPR_SPRG0];
 915    regs.sprg1 = env->spr[SPR_SPRG1];
 916    regs.sprg2 = env->spr[SPR_SPRG2];
 917    regs.sprg3 = env->spr[SPR_SPRG3];
 918    regs.sprg4 = env->spr[SPR_SPRG4];
 919    regs.sprg5 = env->spr[SPR_SPRG5];
 920    regs.sprg6 = env->spr[SPR_SPRG6];
 921    regs.sprg7 = env->spr[SPR_SPRG7];
 922
 923    regs.pid = env->spr[SPR_BOOKE_PID];
 924
 925    for (i = 0;i < 32; i++)
 926        regs.gpr[i] = env->gpr[i];
 927
 928    regs.cr = 0;
 929    for (i = 0; i < 8; i++) {
 930        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 931    }
 932
 933    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 934    if (ret < 0)
 935        return ret;
 936
 937    kvm_put_fp(cs);
 938
 939    if (env->tlb_dirty) {
 940        kvm_sw_tlb_put(cpu);
 941        env->tlb_dirty = false;
 942    }
 943
 944    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 945        ret = kvmppc_put_books_sregs(cpu);
 946        if (ret < 0) {
 947            return ret;
 948        }
 949    }
 950
 951    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 952        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 953    }
 954
 955    if (cap_one_reg) {
 956        int i;
 957
 958        /* We deliberately ignore errors here, for kernels which have
 959         * the ONE_REG calls, but don't support the specific
 960         * registers, there's a reasonable chance things will still
 961         * work, at least until we try to migrate. */
 962        for (i = 0; i < 1024; i++) {
 963            uint64_t id = env->spr_cb[i].one_reg_id;
 964
 965            if (id != 0) {
 966                kvm_put_one_spr(cs, id, i);
 967            }
 968        }
 969
 970#ifdef TARGET_PPC64
 971        if (msr_ts) {
 972            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 973                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 974            }
 975            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 976                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 977            }
 978            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 979            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 980            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 981            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 982            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 983            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 984            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 985            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 986            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 987            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 988        }
 989
 990        if (cap_papr) {
 991            if (kvm_put_vpa(cs) < 0) {
 992                DPRINTF("Warning: Unable to set VPA information to KVM\n");
 993            }
 994        }
 995
 996        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 997#endif /* TARGET_PPC64 */
 998    }
 999
1000    return ret;
1001}
1002
1003static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1004{
1005     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1006}
1007
1008static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1009{
1010    CPUPPCState *env = &cpu->env;
1011    struct kvm_sregs sregs;
1012    int ret;
1013
1014    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1015    if (ret < 0) {
1016        return ret;
1017    }
1018
1019    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1020        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1021        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1022        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1023        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1024        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1025        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1026        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1027        env->spr[SPR_DECR] = sregs.u.e.dec;
1028        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1029        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1030        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1031    }
1032
1033    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1034        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1035        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1036        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1037        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1038        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1039    }
1040
1041    if (sregs.u.e.features & KVM_SREGS_E_64) {
1042        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1043    }
1044
1045    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1046        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1047    }
1048
1049    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1050        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1051        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1052        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1053        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1054        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1055        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1056        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1057        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1058        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1059        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1060        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1061        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1062        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1063        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1064        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1065        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1066        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1067        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1068        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1069        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1070        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1071        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1072        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1073        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1074        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1075        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1076        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1077        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1078        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1079        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1080        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1081        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1082
1083        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1084            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1085            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1086            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1087            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1088            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1089            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1090        }
1091
1092        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1093            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1094            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1095        }
1096
1097        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1098            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1099            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1100            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1101            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1102        }
1103    }
1104
1105    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1106        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1107        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1108        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1109        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1110        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1111        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1112        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1113        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1114        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1115        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1116    }
1117
1118    if (sregs.u.e.features & KVM_SREGS_EXP) {
1119        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1120    }
1121
1122    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1123        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1124        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1125    }
1126
1127    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1128        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1129        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1130        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1131
1132        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1133            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1134            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1135        }
1136    }
1137
1138    return 0;
1139}
1140
1141static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1142{
1143    CPUPPCState *env = &cpu->env;
1144    struct kvm_sregs sregs;
1145    int ret;
1146    int i;
1147
1148    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1149    if (ret < 0) {
1150        return ret;
1151    }
1152
1153    if (!cpu->vhyp) {
1154        ppc_store_sdr1(env, sregs.u.s.sdr1);
1155    }
1156
1157    /* Sync SLB */
1158#ifdef TARGET_PPC64
1159    /*
1160     * The packed SLB array we get from KVM_GET_SREGS only contains
1161     * information about valid entries. So we flush our internal copy
1162     * to get rid of stale ones, then put all valid SLB entries back
1163     * in.
1164     */
1165    memset(env->slb, 0, sizeof(env->slb));
1166    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1167        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1168        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1169        /*
1170         * Only restore valid entries
1171         */
1172        if (rb & SLB_ESID_V) {
1173            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1174        }
1175    }
1176#endif
1177
1178    /* Sync SRs */
1179    for (i = 0; i < 16; i++) {
1180        env->sr[i] = sregs.u.s.ppc32.sr[i];
1181    }
1182
1183    /* Sync BATs */
1184    for (i = 0; i < 8; i++) {
1185        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1186        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1187        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1188        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1189    }
1190
1191    return 0;
1192}
1193
1194int kvm_arch_get_registers(CPUState *cs)
1195{
1196    PowerPCCPU *cpu = POWERPC_CPU(cs);
1197    CPUPPCState *env = &cpu->env;
1198    struct kvm_regs regs;
1199    uint32_t cr;
1200    int i, ret;
1201
1202    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1203    if (ret < 0)
1204        return ret;
1205
1206    cr = regs.cr;
1207    for (i = 7; i >= 0; i--) {
1208        env->crf[i] = cr & 15;
1209        cr >>= 4;
1210    }
1211
1212    env->ctr = regs.ctr;
1213    env->lr = regs.lr;
1214    cpu_write_xer(env, regs.xer);
1215    env->msr = regs.msr;
1216    env->nip = regs.pc;
1217
1218    env->spr[SPR_SRR0] = regs.srr0;
1219    env->spr[SPR_SRR1] = regs.srr1;
1220
1221    env->spr[SPR_SPRG0] = regs.sprg0;
1222    env->spr[SPR_SPRG1] = regs.sprg1;
1223    env->spr[SPR_SPRG2] = regs.sprg2;
1224    env->spr[SPR_SPRG3] = regs.sprg3;
1225    env->spr[SPR_SPRG4] = regs.sprg4;
1226    env->spr[SPR_SPRG5] = regs.sprg5;
1227    env->spr[SPR_SPRG6] = regs.sprg6;
1228    env->spr[SPR_SPRG7] = regs.sprg7;
1229
1230    env->spr[SPR_BOOKE_PID] = regs.pid;
1231
1232    for (i = 0;i < 32; i++)
1233        env->gpr[i] = regs.gpr[i];
1234
1235    kvm_get_fp(cs);
1236
1237    if (cap_booke_sregs) {
1238        ret = kvmppc_get_booke_sregs(cpu);
1239        if (ret < 0) {
1240            return ret;
1241        }
1242    }
1243
1244    if (cap_segstate) {
1245        ret = kvmppc_get_books_sregs(cpu);
1246        if (ret < 0) {
1247            return ret;
1248        }
1249    }
1250
1251    if (cap_hior) {
1252        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1253    }
1254
1255    if (cap_one_reg) {
1256        int i;
1257
1258        /* We deliberately ignore errors here, for kernels which have
1259         * the ONE_REG calls, but don't support the specific
1260         * registers, there's a reasonable chance things will still
1261         * work, at least until we try to migrate. */
1262        for (i = 0; i < 1024; i++) {
1263            uint64_t id = env->spr_cb[i].one_reg_id;
1264
1265            if (id != 0) {
1266                kvm_get_one_spr(cs, id, i);
1267            }
1268        }
1269
1270#ifdef TARGET_PPC64
1271        if (msr_ts) {
1272            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1273                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1274            }
1275            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1276                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1277            }
1278            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1279            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1280            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1281            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1282            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1283            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1284            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1285            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1286            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1287            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1288        }
1289
1290        if (cap_papr) {
1291            if (kvm_get_vpa(cs) < 0) {
1292                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1293            }
1294        }
1295
1296        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1297#endif
1298    }
1299
1300    return 0;
1301}
1302
1303int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1304{
1305    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1306
1307    if (irq != PPC_INTERRUPT_EXT) {
1308        return 0;
1309    }
1310
1311    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1312        return 0;
1313    }
1314
1315    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1316
1317    return 0;
1318}
1319
1320#if defined(TARGET_PPC64)
1321#define PPC_INPUT_INT PPC970_INPUT_INT
1322#else
1323#define PPC_INPUT_INT PPC6xx_INPUT_INT
1324#endif
1325
1326void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327{
1328    PowerPCCPU *cpu = POWERPC_CPU(cs);
1329    CPUPPCState *env = &cpu->env;
1330    int r;
1331    unsigned irq;
1332
1333    qemu_mutex_lock_iothread();
1334
1335    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337    if (!cap_interrupt_level &&
1338        run->ready_for_interrupt_injection &&
1339        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341    {
1342        /* For now KVM disregards the 'irq' argument. However, in the
1343         * future KVM could cache it in-kernel to avoid a heavyweight exit
1344         * when reading the UIC.
1345         */
1346        irq = KVM_INTERRUPT_SET;
1347
1348        DPRINTF("injected interrupt %d\n", irq);
1349        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350        if (r < 0) {
1351            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352        }
1353
1354        /* Always wake up soon in case the interrupt was level based */
1355        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356                       (NANOSECONDS_PER_SECOND / 50));
1357    }
1358
1359    /* We don't know if there are more interrupts pending after this. However,
1360     * the guest will return to userspace in the course of handling this one
1361     * anyways, so we will get a chance to deliver the rest. */
1362
1363    qemu_mutex_unlock_iothread();
1364}
1365
1366MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367{
1368    return MEMTXATTRS_UNSPECIFIED;
1369}
1370
1371int kvm_arch_process_async_events(CPUState *cs)
1372{
1373    return cs->halted;
1374}
1375
1376static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377{
1378    CPUState *cs = CPU(cpu);
1379    CPUPPCState *env = &cpu->env;
1380
1381    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382        cs->halted = 1;
1383        cs->exception_index = EXCP_HLT;
1384    }
1385
1386    return 0;
1387}
1388
1389/* map dcr access to existing qemu dcr emulation */
1390static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391{
1392    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394
1395    return 0;
1396}
1397
1398static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399{
1400    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402
1403    return 0;
1404}
1405
1406int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407{
1408    /* Mixed endian case is not handled */
1409    uint32_t sc = debug_inst_opcode;
1410
1411    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412                            sizeof(sc), 0) ||
1413        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414        return -EINVAL;
1415    }
1416
1417    return 0;
1418}
1419
1420int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421{
1422    uint32_t sc;
1423
1424    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425        sc != debug_inst_opcode ||
1426        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427                            sizeof(sc), 1)) {
1428        return -EINVAL;
1429    }
1430
1431    return 0;
1432}
1433
1434static int find_hw_breakpoint(target_ulong addr, int type)
1435{
1436    int n;
1437
1438    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439           <= ARRAY_SIZE(hw_debug_points));
1440
1441    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442        if (hw_debug_points[n].addr == addr &&
1443             hw_debug_points[n].type == type) {
1444            return n;
1445        }
1446    }
1447
1448    return -1;
1449}
1450
1451static int find_hw_watchpoint(target_ulong addr, int *flag)
1452{
1453    int n;
1454
1455    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456    if (n >= 0) {
1457        *flag = BP_MEM_ACCESS;
1458        return n;
1459    }
1460
1461    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462    if (n >= 0) {
1463        *flag = BP_MEM_WRITE;
1464        return n;
1465    }
1466
1467    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468    if (n >= 0) {
1469        *flag = BP_MEM_READ;
1470        return n;
1471    }
1472
1473    return -1;
1474}
1475
1476int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477                                  target_ulong len, int type)
1478{
1479    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480        return -ENOBUFS;
1481    }
1482
1483    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485
1486    switch (type) {
1487    case GDB_BREAKPOINT_HW:
1488        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489            return -ENOBUFS;
1490        }
1491
1492        if (find_hw_breakpoint(addr, type) >= 0) {
1493            return -EEXIST;
1494        }
1495
1496        nb_hw_breakpoint++;
1497        break;
1498
1499    case GDB_WATCHPOINT_WRITE:
1500    case GDB_WATCHPOINT_READ:
1501    case GDB_WATCHPOINT_ACCESS:
1502        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503            return -ENOBUFS;
1504        }
1505
1506        if (find_hw_breakpoint(addr, type) >= 0) {
1507            return -EEXIST;
1508        }
1509
1510        nb_hw_watchpoint++;
1511        break;
1512
1513    default:
1514        return -ENOSYS;
1515    }
1516
1517    return 0;
1518}
1519
1520int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521                                  target_ulong len, int type)
1522{
1523    int n;
1524
1525    n = find_hw_breakpoint(addr, type);
1526    if (n < 0) {
1527        return -ENOENT;
1528    }
1529
1530    switch (type) {
1531    case GDB_BREAKPOINT_HW:
1532        nb_hw_breakpoint--;
1533        break;
1534
1535    case GDB_WATCHPOINT_WRITE:
1536    case GDB_WATCHPOINT_READ:
1537    case GDB_WATCHPOINT_ACCESS:
1538        nb_hw_watchpoint--;
1539        break;
1540
1541    default:
1542        return -ENOSYS;
1543    }
1544    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545
1546    return 0;
1547}
1548
1549void kvm_arch_remove_all_hw_breakpoints(void)
1550{
1551    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552}
1553
1554void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555{
1556    int n;
1557
1558    /* Software Breakpoint updates */
1559    if (kvm_sw_breakpoints_active(cs)) {
1560        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561    }
1562
1563    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564           <= ARRAY_SIZE(hw_debug_points));
1565    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566
1567    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571            switch (hw_debug_points[n].type) {
1572            case GDB_BREAKPOINT_HW:
1573                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574                break;
1575            case GDB_WATCHPOINT_WRITE:
1576                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577                break;
1578            case GDB_WATCHPOINT_READ:
1579                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580                break;
1581            case GDB_WATCHPOINT_ACCESS:
1582                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583                                        KVMPPC_DEBUG_WATCH_READ;
1584                break;
1585            default:
1586                cpu_abort(cs, "Unsupported breakpoint type\n");
1587            }
1588            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589        }
1590    }
1591}
1592
1593static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594{
1595    CPUState *cs = CPU(cpu);
1596    CPUPPCState *env = &cpu->env;
1597    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598    int handle = 0;
1599    int n;
1600    int flag = 0;
1601
1602    if (cs->singlestep_enabled) {
1603        handle = 1;
1604    } else if (arch_info->status) {
1605        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608                if (n >= 0) {
1609                    handle = 1;
1610                }
1611            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1613                n = find_hw_watchpoint(arch_info->address,  &flag);
1614                if (n >= 0) {
1615                    handle = 1;
1616                    cs->watchpoint_hit = &hw_watchpoint;
1617                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618                    hw_watchpoint.flags = flag;
1619                }
1620            }
1621        }
1622    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623        handle = 1;
1624    } else {
1625        /* QEMU is not able to handle debug exception, so inject
1626         * program exception to guest;
1627         * Yes program exception NOT debug exception !!
1628         * When QEMU is using debug resources then debug exception must
1629         * be always set. To achieve this we set MSR_DE and also set
1630         * MSRP_DEP so guest cannot change MSR_DE.
1631         * When emulating debug resource for guest we want guest
1632         * to control MSR_DE (enable/disable debug interrupt on need).
1633         * Supporting both configurations are NOT possible.
1634         * So the result is that we cannot share debug resources
1635         * between QEMU and Guest on BOOKE architecture.
1636         * In the current design QEMU gets the priority over guest,
1637         * this means that if QEMU is using debug resources then guest
1638         * cannot use them;
1639         * For software breakpoint QEMU uses a privileged instruction;
1640         * So there cannot be any reason that we are here for guest
1641         * set debug exception, only possibility is guest executed a
1642         * privileged / illegal instruction and that's why we are
1643         * injecting a program interrupt.
1644         */
1645
1646        cpu_synchronize_state(cs);
1647        /* env->nip is PC, so increment this by 4 to use
1648         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649         */
1650        env->nip += 4;
1651        cs->exception_index = POWERPC_EXCP_PROGRAM;
1652        env->error_code = POWERPC_EXCP_INVAL;
1653        ppc_cpu_do_interrupt(cs);
1654    }
1655
1656    return handle;
1657}
1658
1659int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660{
1661    PowerPCCPU *cpu = POWERPC_CPU(cs);
1662    CPUPPCState *env = &cpu->env;
1663    int ret;
1664
1665    qemu_mutex_lock_iothread();
1666
1667    switch (run->exit_reason) {
1668    case KVM_EXIT_DCR:
1669        if (run->dcr.is_write) {
1670            DPRINTF("handle dcr write\n");
1671            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672        } else {
1673            DPRINTF("handle dcr read\n");
1674            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675        }
1676        break;
1677    case KVM_EXIT_HLT:
1678        DPRINTF("handle halt\n");
1679        ret = kvmppc_handle_halt(cpu);
1680        break;
1681#if defined(TARGET_PPC64)
1682    case KVM_EXIT_PAPR_HCALL:
1683        DPRINTF("handle PAPR hypercall\n");
1684        run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                              run->papr_hcall.nr,
1686                                              run->papr_hcall.args);
1687        ret = 0;
1688        break;
1689#endif
1690    case KVM_EXIT_EPR:
1691        DPRINTF("handle epr\n");
1692        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693        ret = 0;
1694        break;
1695    case KVM_EXIT_WATCHDOG:
1696        DPRINTF("handle watchdog expiry\n");
1697        watchdog_perform_action();
1698        ret = 0;
1699        break;
1700
1701    case KVM_EXIT_DEBUG:
1702        DPRINTF("handle debug exception\n");
1703        if (kvm_handle_debug(cpu, run)) {
1704            ret = EXCP_DEBUG;
1705            break;
1706        }
1707        /* re-enter, this exception was guest-internal */
1708        ret = 0;
1709        break;
1710
1711    default:
1712        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713        ret = -1;
1714        break;
1715    }
1716
1717    qemu_mutex_unlock_iothread();
1718    return ret;
1719}
1720
1721int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722{
1723    CPUState *cs = CPU(cpu);
1724    uint32_t bits = tsr_bits;
1725    struct kvm_one_reg reg = {
1726        .id = KVM_REG_PPC_OR_TSR,
1727        .addr = (uintptr_t) &bits,
1728    };
1729
1730    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731}
1732
1733int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734{
1735
1736    CPUState *cs = CPU(cpu);
1737    uint32_t bits = tsr_bits;
1738    struct kvm_one_reg reg = {
1739        .id = KVM_REG_PPC_CLEAR_TSR,
1740        .addr = (uintptr_t) &bits,
1741    };
1742
1743    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744}
1745
1746int kvmppc_set_tcr(PowerPCCPU *cpu)
1747{
1748    CPUState *cs = CPU(cpu);
1749    CPUPPCState *env = &cpu->env;
1750    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751
1752    struct kvm_one_reg reg = {
1753        .id = KVM_REG_PPC_TCR,
1754        .addr = (uintptr_t) &tcr,
1755    };
1756
1757    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758}
1759
1760int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761{
1762    CPUState *cs = CPU(cpu);
1763    int ret;
1764
1765    if (!kvm_enabled()) {
1766        return -1;
1767    }
1768
1769    if (!cap_ppc_watchdog) {
1770        printf("warning: KVM does not support watchdog");
1771        return -1;
1772    }
1773
1774    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775    if (ret < 0) {
1776        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                __func__, strerror(-ret));
1778        return ret;
1779    }
1780
1781    return ret;
1782}
1783
1784static int read_cpuinfo(const char *field, char *value, int len)
1785{
1786    FILE *f;
1787    int ret = -1;
1788    int field_len = strlen(field);
1789    char line[512];
1790
1791    f = fopen("/proc/cpuinfo", "r");
1792    if (!f) {
1793        return -1;
1794    }
1795
1796    do {
1797        if (!fgets(line, sizeof(line), f)) {
1798            break;
1799        }
1800        if (!strncmp(line, field, field_len)) {
1801            pstrcpy(value, len, line);
1802            ret = 0;
1803            break;
1804        }
1805    } while(*line);
1806
1807    fclose(f);
1808
1809    return ret;
1810}
1811
1812uint32_t kvmppc_get_tbfreq(void)
1813{
1814    char line[512];
1815    char *ns;
1816    uint32_t retval = NANOSECONDS_PER_SECOND;
1817
1818    if (read_cpuinfo("timebase", line, sizeof(line))) {
1819        return retval;
1820    }
1821
1822    if (!(ns = strchr(line, ':'))) {
1823        return retval;
1824    }
1825
1826    ns++;
1827
1828    return atoi(ns);
1829}
1830
1831bool kvmppc_get_host_serial(char **value)
1832{
1833    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834                               NULL);
1835}
1836
1837bool kvmppc_get_host_model(char **value)
1838{
1839    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840}
1841
1842/* Try to find a device tree node for a CPU with clock-frequency property */
1843static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844{
1845    struct dirent *dirp;
1846    DIR *dp;
1847
1848    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850        return -1;
1851    }
1852
1853    buf[0] = '\0';
1854    while ((dirp = readdir(dp)) != NULL) {
1855        FILE *f;
1856        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857                 dirp->d_name);
1858        f = fopen(buf, "r");
1859        if (f) {
1860            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861            fclose(f);
1862            break;
1863        }
1864        buf[0] = '\0';
1865    }
1866    closedir(dp);
1867    if (buf[0] == '\0') {
1868        printf("Unknown host!\n");
1869        return -1;
1870    }
1871
1872    return 0;
1873}
1874
1875static uint64_t kvmppc_read_int_dt(const char *filename)
1876{
1877    union {
1878        uint32_t v32;
1879        uint64_t v64;
1880    } u;
1881    FILE *f;
1882    int len;
1883
1884    f = fopen(filename, "rb");
1885    if (!f) {
1886        return -1;
1887    }
1888
1889    len = fread(&u, 1, sizeof(u), f);
1890    fclose(f);
1891    switch (len) {
1892    case 4:
1893        /* property is a 32-bit quantity */
1894        return be32_to_cpu(u.v32);
1895    case 8:
1896        return be64_to_cpu(u.v64);
1897    }
1898
1899    return 0;
1900}
1901
1902/* Read a CPU node property from the host device tree that's a single
1903 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1904 * (can't find or open the property, or doesn't understand the
1905 * format) */
1906static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907{
1908    char buf[PATH_MAX], *tmp;
1909    uint64_t val;
1910
1911    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912        return -1;
1913    }
1914
1915    tmp = g_strdup_printf("%s/%s", buf, propname);
1916    val = kvmppc_read_int_dt(tmp);
1917    g_free(tmp);
1918
1919    return val;
1920}
1921
1922uint64_t kvmppc_get_clockfreq(void)
1923{
1924    return kvmppc_read_int_cpu_dt("clock-frequency");
1925}
1926
1927static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928 {
1929     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930     CPUState *cs = CPU(cpu);
1931
1932    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934        return 0;
1935    }
1936
1937    return 1;
1938}
1939
1940int kvmppc_get_hasidle(CPUPPCState *env)
1941{
1942    struct kvm_ppc_pvinfo pvinfo;
1943
1944    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946        return 1;
1947    }
1948
1949    return 0;
1950}
1951
1952int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953{
1954    uint32_t *hc = (uint32_t*)buf;
1955    struct kvm_ppc_pvinfo pvinfo;
1956
1957    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958        memcpy(buf, pvinfo.hcall, buf_len);
1959        return 0;
1960    }
1961
1962    /*
1963     * Fallback to always fail hypercalls regardless of endianness:
1964     *
1965     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966     *     li r3, -1
1967     *     b .+8       (becomes nop in wrong endian)
1968     *     bswap32(li r3, -1)
1969     */
1970
1971    hc[0] = cpu_to_be32(0x08000048);
1972    hc[1] = cpu_to_be32(0x3860ffff);
1973    hc[2] = cpu_to_be32(0x48000008);
1974    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975
1976    return 1;
1977}
1978
1979static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980{
1981    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982}
1983
1984void kvmppc_enable_logical_ci_hcalls(void)
1985{
1986    /*
1987     * FIXME: it would be nice if we could detect the cases where
1988     * we're using a device which requires the in kernel
1989     * implementation of these hcalls, but the kernel lacks them and
1990     * produce a warning.
1991     */
1992    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994}
1995
1996void kvmppc_enable_set_mode_hcall(void)
1997{
1998    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999}
2000
2001void kvmppc_enable_clear_ref_mod_hcalls(void)
2002{
2003    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005}
2006
2007void kvmppc_set_papr(PowerPCCPU *cpu)
2008{
2009    CPUState *cs = CPU(cpu);
2010    int ret;
2011
2012    if (!kvm_enabled()) {
2013        return;
2014    }
2015
2016    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017    if (ret) {
2018        error_report("This vCPU type or KVM version does not support PAPR");
2019        exit(1);
2020    }
2021
2022    /* Update the capability flag so we sync the right information
2023     * with kvm */
2024    cap_papr = 1;
2025}
2026
2027int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028{
2029    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030}
2031
2032void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033{
2034    CPUState *cs = CPU(cpu);
2035    int ret;
2036
2037    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038    if (ret && mpic_proxy) {
2039        error_report("This KVM version does not support EPR");
2040        exit(1);
2041    }
2042}
2043
2044int kvmppc_smt_threads(void)
2045{
2046    return cap_ppc_smt ? cap_ppc_smt : 1;
2047}
2048
2049int kvmppc_set_smt_threads(int smt)
2050{
2051    int ret;
2052
2053    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054    if (!ret) {
2055        cap_ppc_smt = smt;
2056    }
2057    return ret;
2058}
2059
2060void kvmppc_hint_smt_possible(Error **errp)
2061{
2062    int i;
2063    GString *g;
2064    char *s;
2065
2066    assert(kvm_enabled());
2067    if (cap_ppc_smt_possible) {
2068        g = g_string_new("Available VSMT modes:");
2069        for (i = 63; i >= 0; i--) {
2070            if ((1UL << i) & cap_ppc_smt_possible) {
2071                g_string_append_printf(g, " %lu", (1UL << i));
2072            }
2073        }
2074        s = g_string_free(g, false);
2075        error_append_hint(errp, "%s.\n", s);
2076        g_free(s);
2077    } else {
2078        error_append_hint(errp,
2079                          "This KVM seems to be too old to support VSMT.\n");
2080    }
2081}
2082
2083
2084#ifdef TARGET_PPC64
2085uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086{
2087    struct kvm_ppc_smmu_info info;
2088    long rampagesize, best_page_shift;
2089    int i;
2090
2091    /* Find the largest hardware supported page size that's less than
2092     * or equal to the (logical) backing page size of guest RAM */
2093    kvm_get_smmu_info(&info, &error_fatal);
2094    rampagesize = qemu_getrampagesize();
2095    best_page_shift = 0;
2096
2097    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099
2100        if (!sps->page_shift) {
2101            continue;
2102        }
2103
2104        if ((sps->page_shift > best_page_shift)
2105            && ((1UL << sps->page_shift) <= rampagesize)) {
2106            best_page_shift = sps->page_shift;
2107        }
2108    }
2109
2110    return MIN(current_size,
2111               1ULL << (best_page_shift + hash_shift - 7));
2112}
2113#endif
2114
2115bool kvmppc_spapr_use_multitce(void)
2116{
2117    return cap_spapr_multitce;
2118}
2119
2120int kvmppc_spapr_enable_inkernel_multitce(void)
2121{
2122    int ret;
2123
2124    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125                            H_PUT_TCE_INDIRECT, 1);
2126    if (!ret) {
2127        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                                H_STUFF_TCE, 1);
2129    }
2130
2131    return ret;
2132}
2133
2134void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135                              uint64_t bus_offset, uint32_t nb_table,
2136                              int *pfd, bool need_vfio)
2137{
2138    long len;
2139    int fd;
2140    void *table;
2141
2142    /* Must set fd to -1 so we don't try to munmap when called for
2143     * destroying the table, which the upper layers -will- do
2144     */
2145    *pfd = -1;
2146    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147        return NULL;
2148    }
2149
2150    if (cap_spapr_tce_64) {
2151        struct kvm_create_spapr_tce_64 args = {
2152            .liobn = liobn,
2153            .page_shift = page_shift,
2154            .offset = bus_offset >> page_shift,
2155            .size = nb_table,
2156            .flags = 0
2157        };
2158        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159        if (fd < 0) {
2160            fprintf(stderr,
2161                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162                    liobn);
2163            return NULL;
2164        }
2165    } else if (cap_spapr_tce) {
2166        uint64_t window_size = (uint64_t) nb_table << page_shift;
2167        struct kvm_create_spapr_tce args = {
2168            .liobn = liobn,
2169            .window_size = window_size,
2170        };
2171        if ((window_size != args.window_size) || bus_offset) {
2172            return NULL;
2173        }
2174        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175        if (fd < 0) {
2176            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177                    liobn);
2178            return NULL;
2179        }
2180    } else {
2181        return NULL;
2182    }
2183
2184    len = nb_table * sizeof(uint64_t);
2185    /* FIXME: round this up to page size */
2186
2187    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188    if (table == MAP_FAILED) {
2189        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190                liobn);
2191        close(fd);
2192        return NULL;
2193    }
2194
2195    *pfd = fd;
2196    return table;
2197}
2198
2199int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200{
2201    long len;
2202
2203    if (fd < 0) {
2204        return -1;
2205    }
2206
2207    len = nb_table * sizeof(uint64_t);
2208    if ((munmap(table, len) < 0) ||
2209        (close(fd) < 0)) {
2210        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211                strerror(errno));
2212        /* Leak the table */
2213    }
2214
2215    return 0;
2216}
2217
2218int kvmppc_reset_htab(int shift_hint)
2219{
2220    uint32_t shift = shift_hint;
2221
2222    if (!kvm_enabled()) {
2223        /* Full emulation, tell caller to allocate htab itself */
2224        return 0;
2225    }
2226    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227        int ret;
2228        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229        if (ret == -ENOTTY) {
2230            /* At least some versions of PR KVM advertise the
2231             * capability, but don't implement the ioctl().  Oops.
2232             * Return 0 so that we allocate the htab in qemu, as is
2233             * correct for PR. */
2234            return 0;
2235        } else if (ret < 0) {
2236            return ret;
2237        }
2238        return shift;
2239    }
2240
2241    /* We have a kernel that predates the htab reset calls.  For PR
2242     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243     * this era, it has allocated a 16MB fixed size hash table already. */
2244    if (kvmppc_is_pr(kvm_state)) {
2245        /* PR - tell caller to allocate htab */
2246        return 0;
2247    } else {
2248        /* HV - assume 16MB kernel allocated htab */
2249        return 24;
2250    }
2251}
2252
2253static inline uint32_t mfpvr(void)
2254{
2255    uint32_t pvr;
2256
2257    asm ("mfpvr %0"
2258         : "=r"(pvr));
2259    return pvr;
2260}
2261
2262static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263{
2264    if (on) {
2265        *word |= flags;
2266    } else {
2267        *word &= ~flags;
2268    }
2269}
2270
2271static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272{
2273    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276
2277    /* Now fix up the class with information we can query from the host */
2278    pcc->pvr = mfpvr();
2279
2280    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286
2287    if (dcache_size != -1) {
2288        pcc->l1_dcache_size = dcache_size;
2289    }
2290
2291    if (icache_size != -1) {
2292        pcc->l1_icache_size = icache_size;
2293    }
2294
2295#if defined(TARGET_PPC64)
2296    pcc->radix_page_info = kvm_get_radix_page_info();
2297
2298    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299        /*
2300         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301         * compliant.  More importantly, advertising ISA 3.00
2302         * architected mode may prevent guests from activating
2303         * necessary DD1 workarounds.
2304         */
2305        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307    }
2308#endif /* defined(TARGET_PPC64) */
2309}
2310
2311bool kvmppc_has_cap_epr(void)
2312{
2313    return cap_epr;
2314}
2315
2316bool kvmppc_has_cap_fixup_hcalls(void)
2317{
2318    return cap_fixup_hcalls;
2319}
2320
2321bool kvmppc_has_cap_htm(void)
2322{
2323    return cap_htm;
2324}
2325
2326bool kvmppc_has_cap_mmu_radix(void)
2327{
2328    return cap_mmu_radix;
2329}
2330
2331bool kvmppc_has_cap_mmu_hash_v3(void)
2332{
2333    return cap_mmu_hash_v3;
2334}
2335
2336static bool kvmppc_power8_host(void)
2337{
2338    bool ret = false;
2339#ifdef TARGET_PPC64
2340    {
2341        uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342        ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343              (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344              (base_pvr == CPU_POWERPC_POWER8_BASE);
2345    }
2346#endif /* TARGET_PPC64 */
2347    return ret;
2348}
2349
2350static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351{
2352    bool l1d_thread_priv_req = !kvmppc_power8_host();
2353
2354    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355        return 2;
2356    } else if ((!l1d_thread_priv_req ||
2357                c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358               (c.character & c.character_mask
2359                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360        return 1;
2361    }
2362
2363    return 0;
2364}
2365
2366static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367{
2368    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369        return 2;
2370    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371        return 1;
2372    }
2373
2374    return 0;
2375}
2376
2377static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378{
2379    if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380        return  SPAPR_CAP_FIXED_CCD;
2381    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382        return SPAPR_CAP_FIXED_IBS;
2383    }
2384
2385    return 0;
2386}
2387
2388static void kvmppc_get_cpu_characteristics(KVMState *s)
2389{
2390    struct kvm_ppc_cpu_char c;
2391    int ret;
2392
2393    /* Assume broken */
2394    cap_ppc_safe_cache = 0;
2395    cap_ppc_safe_bounds_check = 0;
2396    cap_ppc_safe_indirect_branch = 0;
2397
2398    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399    if (!ret) {
2400        return;
2401    }
2402    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403    if (ret < 0) {
2404        return;
2405    }
2406
2407    cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408    cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409    cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410}
2411
2412int kvmppc_get_cap_safe_cache(void)
2413{
2414    return cap_ppc_safe_cache;
2415}
2416
2417int kvmppc_get_cap_safe_bounds_check(void)
2418{
2419    return cap_ppc_safe_bounds_check;
2420}
2421
2422int kvmppc_get_cap_safe_indirect_branch(void)
2423{
2424    return cap_ppc_safe_indirect_branch;
2425}
2426
2427bool kvmppc_has_cap_nested_kvm_hv(void)
2428{
2429    return !!cap_ppc_nested_kvm_hv;
2430}
2431
2432int kvmppc_set_cap_nested_kvm_hv(int enable)
2433{
2434    return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2435}
2436
2437bool kvmppc_has_cap_spapr_vfio(void)
2438{
2439    return cap_spapr_vfio;
2440}
2441
2442PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2443{
2444    uint32_t host_pvr = mfpvr();
2445    PowerPCCPUClass *pvr_pcc;
2446
2447    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2448    if (pvr_pcc == NULL) {
2449        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2450    }
2451
2452    return pvr_pcc;
2453}
2454
2455static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2456{
2457    TypeInfo type_info = {
2458        .name = TYPE_HOST_POWERPC_CPU,
2459        .class_init = kvmppc_host_cpu_class_init,
2460    };
2461    MachineClass *mc = MACHINE_GET_CLASS(ms);
2462    PowerPCCPUClass *pvr_pcc;
2463    ObjectClass *oc;
2464    DeviceClass *dc;
2465    int i;
2466
2467    pvr_pcc = kvm_ppc_get_host_cpu_class();
2468    if (pvr_pcc == NULL) {
2469        return -1;
2470    }
2471    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2472    type_register(&type_info);
2473    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2474        /* override TCG default cpu type with 'host' cpu model */
2475        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2476    }
2477
2478    oc = object_class_by_name(type_info.name);
2479    g_assert(oc);
2480
2481    /*
2482     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2483     * we want "POWER8" to be a "family" alias that points to the current
2484     * host CPU type, too)
2485     */
2486    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2487    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2488        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2489            char *suffix;
2490
2491            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2492            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2493            if (suffix) {
2494                *suffix = 0;
2495            }
2496            break;
2497        }
2498    }
2499
2500    return 0;
2501}
2502
2503int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2504{
2505    struct kvm_rtas_token_args args = {
2506        .token = token,
2507    };
2508
2509    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2510        return -ENOENT;
2511    }
2512
2513    strncpy(args.name, function, sizeof(args.name));
2514
2515    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2516}
2517
2518int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2519{
2520    struct kvm_get_htab_fd s = {
2521        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2522        .start_index = index,
2523    };
2524    int ret;
2525
2526    if (!cap_htab_fd) {
2527        error_setg(errp, "KVM version doesn't support %s the HPT",
2528                   write ? "writing" : "reading");
2529        return -ENOTSUP;
2530    }
2531
2532    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2533    if (ret < 0) {
2534        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2535                   write ? "writing" : "reading", write ? "to" : "from",
2536                   strerror(errno));
2537        return -errno;
2538    }
2539
2540    return ret;
2541}
2542
2543int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2544{
2545    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2546    uint8_t buf[bufsize];
2547    ssize_t rc;
2548
2549    do {
2550        rc = read(fd, buf, bufsize);
2551        if (rc < 0) {
2552            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2553                    strerror(errno));
2554            return rc;
2555        } else if (rc) {
2556            uint8_t *buffer = buf;
2557            ssize_t n = rc;
2558            while (n) {
2559                struct kvm_get_htab_header *head =
2560                    (struct kvm_get_htab_header *) buffer;
2561                size_t chunksize = sizeof(*head) +
2562                     HASH_PTE_SIZE_64 * head->n_valid;
2563
2564                qemu_put_be32(f, head->index);
2565                qemu_put_be16(f, head->n_valid);
2566                qemu_put_be16(f, head->n_invalid);
2567                qemu_put_buffer(f, (void *)(head + 1),
2568                                HASH_PTE_SIZE_64 * head->n_valid);
2569
2570                buffer += chunksize;
2571                n -= chunksize;
2572            }
2573        }
2574    } while ((rc != 0)
2575             && ((max_ns < 0)
2576                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2577
2578    return (rc == 0) ? 1 : 0;
2579}
2580
2581int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2582                           uint16_t n_valid, uint16_t n_invalid)
2583{
2584    struct kvm_get_htab_header *buf;
2585    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2586    ssize_t rc;
2587
2588    buf = alloca(chunksize);
2589    buf->index = index;
2590    buf->n_valid = n_valid;
2591    buf->n_invalid = n_invalid;
2592
2593    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2594
2595    rc = write(fd, buf, chunksize);
2596    if (rc < 0) {
2597        fprintf(stderr, "Error writing KVM hash table: %s\n",
2598                strerror(errno));
2599        return rc;
2600    }
2601    if (rc != chunksize) {
2602        /* We should never get a short write on a single chunk */
2603        fprintf(stderr, "Short write, restoring KVM hash table\n");
2604        return -1;
2605    }
2606    return 0;
2607}
2608
2609bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2610{
2611    return true;
2612}
2613
2614void kvm_arch_init_irq_routing(KVMState *s)
2615{
2616}
2617
2618void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2619{
2620    int fd, rc;
2621    int i;
2622
2623    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2624
2625    i = 0;
2626    while (i < n) {
2627        struct kvm_get_htab_header *hdr;
2628        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2629        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2630
2631        rc = read(fd, buf, sizeof(buf));
2632        if (rc < 0) {
2633            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2634        }
2635
2636        hdr = (struct kvm_get_htab_header *)buf;
2637        while ((i < n) && ((char *)hdr < (buf + rc))) {
2638            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2639
2640            if (hdr->index != (ptex + i)) {
2641                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2642                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2643            }
2644
2645            if (n - i < valid) {
2646                valid = n - i;
2647            }
2648            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2649            i += valid;
2650
2651            if ((n - i) < invalid) {
2652                invalid = n - i;
2653            }
2654            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2655            i += invalid;
2656
2657            hdr = (struct kvm_get_htab_header *)
2658                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2659        }
2660    }
2661
2662    close(fd);
2663}
2664
2665void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2666{
2667    int fd, rc;
2668    struct {
2669        struct kvm_get_htab_header hdr;
2670        uint64_t pte0;
2671        uint64_t pte1;
2672    } buf;
2673
2674    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2675
2676    buf.hdr.n_valid = 1;
2677    buf.hdr.n_invalid = 0;
2678    buf.hdr.index = ptex;
2679    buf.pte0 = cpu_to_be64(pte0);
2680    buf.pte1 = cpu_to_be64(pte1);
2681
2682    rc = write(fd, &buf, sizeof(buf));
2683    if (rc != sizeof(buf)) {
2684        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2685    }
2686    close(fd);
2687}
2688
2689int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2690                             uint64_t address, uint32_t data, PCIDevice *dev)
2691{
2692    return 0;
2693}
2694
2695int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2696                                int vector, PCIDevice *dev)
2697{
2698    return 0;
2699}
2700
2701int kvm_arch_release_virq_post(int virq)
2702{
2703    return 0;
2704}
2705
2706int kvm_arch_msi_data_to_gsi(uint32_t data)
2707{
2708    return data & 0xffff;
2709}
2710
2711int kvmppc_enable_hwrng(void)
2712{
2713    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2714        return -1;
2715    }
2716
2717    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2718}
2719
2720void kvmppc_check_papr_resize_hpt(Error **errp)
2721{
2722    if (!kvm_enabled()) {
2723        return; /* No KVM, we're good */
2724    }
2725
2726    if (cap_resize_hpt) {
2727        return; /* Kernel has explicit support, we're good */
2728    }
2729
2730    /* Otherwise fallback on looking for PR KVM */
2731    if (kvmppc_is_pr(kvm_state)) {
2732        return;
2733    }
2734
2735    error_setg(errp,
2736               "Hash page table resizing not available with this KVM version");
2737}
2738
2739int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2740{
2741    CPUState *cs = CPU(cpu);
2742    struct kvm_ppc_resize_hpt rhpt = {
2743        .flags = flags,
2744        .shift = shift,
2745    };
2746
2747    if (!cap_resize_hpt) {
2748        return -ENOSYS;
2749    }
2750
2751    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2752}
2753
2754int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2755{
2756    CPUState *cs = CPU(cpu);
2757    struct kvm_ppc_resize_hpt rhpt = {
2758        .flags = flags,
2759        .shift = shift,
2760    };
2761
2762    if (!cap_resize_hpt) {
2763        return -ENOSYS;
2764    }
2765
2766    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2767}
2768
2769/*
2770 * This is a helper function to detect a post migration scenario
2771 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2772 * the guest kernel can't handle a PVR value other than the actual host
2773 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2774 *
2775 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2776 * (so, we're HV), return true. The workaround itself is done in
2777 * cpu_post_load.
2778 *
2779 * The order here is important: we'll only check for KVM PR as a
2780 * fallback if the guest kernel can't handle the situation itself.
2781 * We need to avoid as much as possible querying the running KVM type
2782 * in QEMU level.
2783 */
2784bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2785{
2786    CPUState *cs = CPU(cpu);
2787
2788    if (!kvm_enabled()) {
2789        return false;
2790    }
2791
2792    if (cap_ppc_pvr_compat) {
2793        return false;
2794    }
2795
2796    return !kvmppc_is_pr(cs->kvm_state);
2797}
2798
2799void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2800{
2801    CPUState *cs = CPU(cpu);
2802
2803    if (kvm_enabled()) {
2804        kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2805    }
2806}
2807