qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_cpu_core.h"
  40#include "hw/ppc/ppc.h"
  41#include "sysemu/watchdog.h"
  42#include "trace.h"
  43#include "exec/gdbstub.h"
  44#include "exec/memattrs.h"
  45#include "exec/ram_addr.h"
  46#include "sysemu/hostmem.h"
  47#include "qemu/cutils.h"
  48#include "qemu/mmap-alloc.h"
  49#include "elf.h"
  50#include "sysemu/kvm_int.h"
  51
  52#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55    KVM_CAP_LAST_INFO
  56};
  57
  58static int cap_interrupt_unset;
  59static int cap_interrupt_level;
  60static int cap_segstate;
  61static int cap_booke_sregs;
  62static int cap_ppc_smt;
  63static int cap_ppc_smt_possible;
  64static int cap_spapr_tce;
  65static int cap_spapr_tce_64;
  66static int cap_spapr_multitce;
  67static int cap_spapr_vfio;
  68static int cap_hior;
  69static int cap_one_reg;
  70static int cap_epr;
  71static int cap_ppc_watchdog;
  72static int cap_papr;
  73static int cap_htab_fd;
  74static int cap_fixup_hcalls;
  75static int cap_htm;             /* Hardware transactional memory support */
  76static int cap_mmu_radix;
  77static int cap_mmu_hash_v3;
  78static int cap_xive;
  79static int cap_resize_hpt;
  80static int cap_ppc_pvr_compat;
  81static int cap_ppc_safe_cache;
  82static int cap_ppc_safe_bounds_check;
  83static int cap_ppc_safe_indirect_branch;
  84static int cap_ppc_count_cache_flush_assist;
  85static int cap_ppc_nested_kvm_hv;
  86static int cap_large_decr;
  87
  88static uint32_t debug_inst_opcode;
  89
  90/*
  91 * XXX We have a race condition where we actually have a level triggered
  92 *     interrupt, but the infrastructure can't expose that yet, so the guest
  93 *     takes but ignores it, goes to sleep and never gets notified that there's
  94 *     still an interrupt pending.
  95 *
  96 *     As a quick workaround, let's just wake up again 20 ms after we injected
  97 *     an interrupt. That way we can assure that we're always reinjecting
  98 *     interrupts in case the guest swallowed them.
  99 */
 100static QEMUTimer *idle_timer;
 101
 102static void kvm_kick_cpu(void *opaque)
 103{
 104    PowerPCCPU *cpu = opaque;
 105
 106    qemu_cpu_kick(CPU(cpu));
 107}
 108
 109/*
 110 * Check whether we are running with KVM-PR (instead of KVM-HV).  This
 111 * should only be used for fallback tests - generally we should use
 112 * explicit capabilities for the features we want, rather than
 113 * assuming what is/isn't available depending on the KVM variant.
 114 */
 115static bool kvmppc_is_pr(KVMState *ks)
 116{
 117    /* Assume KVM-PR if the GET_PVINFO capability is available */
 118    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 119}
 120
 121static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 122static void kvmppc_get_cpu_characteristics(KVMState *s);
 123static int kvmppc_get_dec_bits(void);
 124
 125int kvm_arch_init(MachineState *ms, KVMState *s)
 126{
 127    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 128    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 129    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 130    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 131    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 132    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 133    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 134    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 135    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 136    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 137    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 138    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 139    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 140    /*
 141     * Note: we don't set cap_papr here, because this capability is
 142     * only activated after this by kvmppc_set_papr()
 143     */
 144    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 145    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 146    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 147    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 148    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 149    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 150    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
 151    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152    kvmppc_get_cpu_characteristics(s);
 153    cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
 154    cap_large_decr = kvmppc_get_dec_bits();
 155    /*
 156     * Note: setting it to false because there is not such capability
 157     * in KVM at this moment.
 158     *
 159     * TODO: call kvm_vm_check_extension() with the right capability
 160     * after the kernel starts implementing it.
 161     */
 162    cap_ppc_pvr_compat = false;
 163
 164    if (!cap_interrupt_level) {
 165        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 166                        "VM to stall at times!\n");
 167    }
 168
 169    kvm_ppc_register_host_cpu_type(ms);
 170
 171    return 0;
 172}
 173
 174int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 175{
 176    return 0;
 177}
 178
 179static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 180{
 181    CPUPPCState *cenv = &cpu->env;
 182    CPUState *cs = CPU(cpu);
 183    struct kvm_sregs sregs;
 184    int ret;
 185
 186    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 187        /*
 188         * What we're really trying to say is "if we're on BookE, we
 189         * use the native PVR for now". This is the only sane way to
 190         * check it though, so we potentially confuse users that they
 191         * can run BookE guests on BookS. Let's hope nobody dares
 192         * enough :)
 193         */
 194        return 0;
 195    } else {
 196        if (!cap_segstate) {
 197            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 198            return -ENOSYS;
 199        }
 200    }
 201
 202    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 203    if (ret) {
 204        return ret;
 205    }
 206
 207    sregs.pvr = cenv->spr[SPR_PVR];
 208    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 209}
 210
 211/* Set up a shared TLB array with KVM */
 212static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 213{
 214    CPUPPCState *env = &cpu->env;
 215    CPUState *cs = CPU(cpu);
 216    struct kvm_book3e_206_tlb_params params = {};
 217    struct kvm_config_tlb cfg = {};
 218    unsigned int entries = 0;
 219    int ret, i;
 220
 221    if (!kvm_enabled() ||
 222        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 223        return 0;
 224    }
 225
 226    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 227
 228    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 229        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 230        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 231        entries += params.tlb_sizes[i];
 232    }
 233
 234    assert(entries == env->nb_tlb);
 235    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 236
 237    env->tlb_dirty = true;
 238
 239    cfg.array = (uintptr_t)env->tlb.tlbm;
 240    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 241    cfg.params = (uintptr_t)&params;
 242    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 243
 244    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 245    if (ret < 0) {
 246        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 247                __func__, strerror(-ret));
 248        return ret;
 249    }
 250
 251    env->kvm_sw_tlb = true;
 252    return 0;
 253}
 254
 255
 256#if defined(TARGET_PPC64)
 257static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 258{
 259    int ret;
 260
 261    assert(kvm_state != NULL);
 262
 263    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 264        error_setg(errp, "KVM doesn't expose the MMU features it supports");
 265        error_append_hint(errp, "Consider switching to a newer KVM\n");
 266        return;
 267    }
 268
 269    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 270    if (ret == 0) {
 271        return;
 272    }
 273
 274    error_setg_errno(errp, -ret,
 275                     "KVM failed to provide the MMU features it supports");
 276}
 277
 278struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 279{
 280    KVMState *s = KVM_STATE(current_machine->accelerator);
 281    struct ppc_radix_page_info *radix_page_info;
 282    struct kvm_ppc_rmmu_info rmmu_info;
 283    int i;
 284
 285    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 286        return NULL;
 287    }
 288    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 289        return NULL;
 290    }
 291    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 292    radix_page_info->count = 0;
 293    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 294        if (rmmu_info.ap_encodings[i]) {
 295            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 296            radix_page_info->count++;
 297        }
 298    }
 299    return radix_page_info;
 300}
 301
 302target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 303                                     bool radix, bool gtse,
 304                                     uint64_t proc_tbl)
 305{
 306    CPUState *cs = CPU(cpu);
 307    int ret;
 308    uint64_t flags = 0;
 309    struct kvm_ppc_mmuv3_cfg cfg = {
 310        .process_table = proc_tbl,
 311    };
 312
 313    if (radix) {
 314        flags |= KVM_PPC_MMUV3_RADIX;
 315    }
 316    if (gtse) {
 317        flags |= KVM_PPC_MMUV3_GTSE;
 318    }
 319    cfg.flags = flags;
 320    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 321    switch (ret) {
 322    case 0:
 323        return H_SUCCESS;
 324    case -EINVAL:
 325        return H_PARAMETER;
 326    case -ENODEV:
 327        return H_NOT_AVAILABLE;
 328    default:
 329        return H_HARDWARE;
 330    }
 331}
 332
 333bool kvmppc_hpt_needs_host_contiguous_pages(void)
 334{
 335    static struct kvm_ppc_smmu_info smmu_info;
 336
 337    if (!kvm_enabled()) {
 338        return false;
 339    }
 340
 341    kvm_get_smmu_info(&smmu_info, &error_fatal);
 342    return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 343}
 344
 345void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 346{
 347    struct kvm_ppc_smmu_info smmu_info;
 348    int iq, ik, jq, jk;
 349    Error *local_err = NULL;
 350
 351    /* For now, we only have anything to check on hash64 MMUs */
 352    if (!cpu->hash64_opts || !kvm_enabled()) {
 353        return;
 354    }
 355
 356    kvm_get_smmu_info(&smmu_info, &local_err);
 357    if (local_err) {
 358        error_propagate(errp, local_err);
 359        return;
 360    }
 361
 362    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 363        && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 364        error_setg(errp,
 365                   "KVM does not support 1TiB segments which guest expects");
 366        return;
 367    }
 368
 369    if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 370        error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 371                   smmu_info.slb_size, cpu->hash64_opts->slb_size);
 372        return;
 373    }
 374
 375    /*
 376     * Verify that every pagesize supported by the cpu model is
 377     * supported by KVM with the same encodings
 378     */
 379    for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 380        PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 381        struct kvm_ppc_one_seg_page_size *ksps;
 382
 383        for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 384            if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 385                break;
 386            }
 387        }
 388        if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 389            error_setg(errp, "KVM doesn't support for base page shift %u",
 390                       qsps->page_shift);
 391            return;
 392        }
 393
 394        ksps = &smmu_info.sps[ik];
 395        if (ksps->slb_enc != qsps->slb_enc) {
 396            error_setg(errp,
 397"KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 398                       ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 399            return;
 400        }
 401
 402        for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 403            for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 404                if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 405                    break;
 406                }
 407            }
 408
 409            if (jk >= ARRAY_SIZE(ksps->enc)) {
 410                error_setg(errp, "KVM doesn't support page shift %u/%u",
 411                           qsps->enc[jq].page_shift, qsps->page_shift);
 412                return;
 413            }
 414            if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 415                error_setg(errp,
 416"KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 417                           ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 418                           qsps->page_shift, qsps->enc[jq].pte_enc);
 419                return;
 420            }
 421        }
 422    }
 423
 424    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 425        /*
 426         * Mostly what guest pagesizes we can use are related to the
 427         * host pages used to map guest RAM, which is handled in the
 428         * platform code. Cache-Inhibited largepages (64k) however are
 429         * used for I/O, so if they're mapped to the host at all it
 430         * will be a normal mapping, not a special hugepage one used
 431         * for RAM.
 432         */
 433        if (getpagesize() < 0x10000) {
 434            error_setg(errp,
 435                       "KVM can't supply 64kiB CI pages, which guest expects");
 436        }
 437    }
 438}
 439#endif /* !defined (TARGET_PPC64) */
 440
 441unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 442{
 443    return POWERPC_CPU(cpu)->vcpu_id;
 444}
 445
 446/*
 447 * e500 supports 2 h/w breakpoint and 2 watchpoint.  book3s supports
 448 * only 1 watchpoint, so array size of 4 is sufficient for now.
 449 */
 450#define MAX_HW_BKPTS 4
 451
 452static struct HWBreakpoint {
 453    target_ulong addr;
 454    int type;
 455} hw_debug_points[MAX_HW_BKPTS];
 456
 457static CPUWatchpoint hw_watchpoint;
 458
 459/* Default there is no breakpoint and watchpoint supported */
 460static int max_hw_breakpoint;
 461static int max_hw_watchpoint;
 462static int nb_hw_breakpoint;
 463static int nb_hw_watchpoint;
 464
 465static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 466{
 467    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 468        max_hw_breakpoint = 2;
 469        max_hw_watchpoint = 2;
 470    }
 471
 472    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 473        fprintf(stderr, "Error initializing h/w breakpoints\n");
 474        return;
 475    }
 476}
 477
 478int kvm_arch_init_vcpu(CPUState *cs)
 479{
 480    PowerPCCPU *cpu = POWERPC_CPU(cs);
 481    CPUPPCState *cenv = &cpu->env;
 482    int ret;
 483
 484    /* Synchronize sregs with kvm */
 485    ret = kvm_arch_sync_sregs(cpu);
 486    if (ret) {
 487        if (ret == -EINVAL) {
 488            error_report("Register sync failed... If you're using kvm-hv.ko,"
 489                         " only \"-cpu host\" is possible");
 490        }
 491        return ret;
 492    }
 493
 494    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 495
 496    switch (cenv->mmu_model) {
 497    case POWERPC_MMU_BOOKE206:
 498        /* This target supports access to KVM's guest TLB */
 499        ret = kvm_booke206_tlb_init(cpu);
 500        break;
 501    case POWERPC_MMU_2_07:
 502        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 503            /*
 504             * KVM-HV has transactional memory on POWER8 also without
 505             * the KVM_CAP_PPC_HTM extension, so enable it here
 506             * instead as long as it's availble to userspace on the
 507             * host.
 508             */
 509            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 510                cap_htm = true;
 511            }
 512        }
 513        break;
 514    default:
 515        break;
 516    }
 517
 518    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 519    kvmppc_hw_debug_points_init(cenv);
 520
 521    return ret;
 522}
 523
 524int kvm_arch_destroy_vcpu(CPUState *cs)
 525{
 526    return 0;
 527}
 528
 529static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 530{
 531    CPUPPCState *env = &cpu->env;
 532    CPUState *cs = CPU(cpu);
 533    struct kvm_dirty_tlb dirty_tlb;
 534    unsigned char *bitmap;
 535    int ret;
 536
 537    if (!env->kvm_sw_tlb) {
 538        return;
 539    }
 540
 541    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 542    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 543
 544    dirty_tlb.bitmap = (uintptr_t)bitmap;
 545    dirty_tlb.num_dirty = env->nb_tlb;
 546
 547    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 548    if (ret) {
 549        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 550                __func__, strerror(-ret));
 551    }
 552
 553    g_free(bitmap);
 554}
 555
 556static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 557{
 558    PowerPCCPU *cpu = POWERPC_CPU(cs);
 559    CPUPPCState *env = &cpu->env;
 560    union {
 561        uint32_t u32;
 562        uint64_t u64;
 563    } val;
 564    struct kvm_one_reg reg = {
 565        .id = id,
 566        .addr = (uintptr_t) &val,
 567    };
 568    int ret;
 569
 570    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 571    if (ret != 0) {
 572        trace_kvm_failed_spr_get(spr, strerror(errno));
 573    } else {
 574        switch (id & KVM_REG_SIZE_MASK) {
 575        case KVM_REG_SIZE_U32:
 576            env->spr[spr] = val.u32;
 577            break;
 578
 579        case KVM_REG_SIZE_U64:
 580            env->spr[spr] = val.u64;
 581            break;
 582
 583        default:
 584            /* Don't handle this size yet */
 585            abort();
 586        }
 587    }
 588}
 589
 590static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 591{
 592    PowerPCCPU *cpu = POWERPC_CPU(cs);
 593    CPUPPCState *env = &cpu->env;
 594    union {
 595        uint32_t u32;
 596        uint64_t u64;
 597    } val;
 598    struct kvm_one_reg reg = {
 599        .id = id,
 600        .addr = (uintptr_t) &val,
 601    };
 602    int ret;
 603
 604    switch (id & KVM_REG_SIZE_MASK) {
 605    case KVM_REG_SIZE_U32:
 606        val.u32 = env->spr[spr];
 607        break;
 608
 609    case KVM_REG_SIZE_U64:
 610        val.u64 = env->spr[spr];
 611        break;
 612
 613    default:
 614        /* Don't handle this size yet */
 615        abort();
 616    }
 617
 618    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 619    if (ret != 0) {
 620        trace_kvm_failed_spr_set(spr, strerror(errno));
 621    }
 622}
 623
 624static int kvm_put_fp(CPUState *cs)
 625{
 626    PowerPCCPU *cpu = POWERPC_CPU(cs);
 627    CPUPPCState *env = &cpu->env;
 628    struct kvm_one_reg reg;
 629    int i;
 630    int ret;
 631
 632    if (env->insns_flags & PPC_FLOAT) {
 633        uint64_t fpscr = env->fpscr;
 634        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 635
 636        reg.id = KVM_REG_PPC_FPSCR;
 637        reg.addr = (uintptr_t)&fpscr;
 638        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 639        if (ret < 0) {
 640            trace_kvm_failed_fpscr_set(strerror(errno));
 641            return ret;
 642        }
 643
 644        for (i = 0; i < 32; i++) {
 645            uint64_t vsr[2];
 646            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 647            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 648
 649#ifdef HOST_WORDS_BIGENDIAN
 650            vsr[0] = float64_val(*fpr);
 651            vsr[1] = *vsrl;
 652#else
 653            vsr[0] = *vsrl;
 654            vsr[1] = float64_val(*fpr);
 655#endif
 656            reg.addr = (uintptr_t) &vsr;
 657            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 658
 659            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 660            if (ret < 0) {
 661                trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i,
 662                                        strerror(errno));
 663                return ret;
 664            }
 665        }
 666    }
 667
 668    if (env->insns_flags & PPC_ALTIVEC) {
 669        reg.id = KVM_REG_PPC_VSCR;
 670        reg.addr = (uintptr_t)&env->vscr;
 671        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 672        if (ret < 0) {
 673            trace_kvm_failed_vscr_set(strerror(errno));
 674            return ret;
 675        }
 676
 677        for (i = 0; i < 32; i++) {
 678            reg.id = KVM_REG_PPC_VR(i);
 679            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 680            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 681            if (ret < 0) {
 682                trace_kvm_failed_vr_set(i, strerror(errno));
 683                return ret;
 684            }
 685        }
 686    }
 687
 688    return 0;
 689}
 690
 691static int kvm_get_fp(CPUState *cs)
 692{
 693    PowerPCCPU *cpu = POWERPC_CPU(cs);
 694    CPUPPCState *env = &cpu->env;
 695    struct kvm_one_reg reg;
 696    int i;
 697    int ret;
 698
 699    if (env->insns_flags & PPC_FLOAT) {
 700        uint64_t fpscr;
 701        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 702
 703        reg.id = KVM_REG_PPC_FPSCR;
 704        reg.addr = (uintptr_t)&fpscr;
 705        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 706        if (ret < 0) {
 707            trace_kvm_failed_fpscr_get(strerror(errno));
 708            return ret;
 709        } else {
 710            env->fpscr = fpscr;
 711        }
 712
 713        for (i = 0; i < 32; i++) {
 714            uint64_t vsr[2];
 715            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 716            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 717
 718            reg.addr = (uintptr_t) &vsr;
 719            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 720
 721            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 722            if (ret < 0) {
 723                trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i,
 724                                        strerror(errno));
 725                return ret;
 726            } else {
 727#ifdef HOST_WORDS_BIGENDIAN
 728                *fpr = vsr[0];
 729                if (vsx) {
 730                    *vsrl = vsr[1];
 731                }
 732#else
 733                *fpr = vsr[1];
 734                if (vsx) {
 735                    *vsrl = vsr[0];
 736                }
 737#endif
 738            }
 739        }
 740    }
 741
 742    if (env->insns_flags & PPC_ALTIVEC) {
 743        reg.id = KVM_REG_PPC_VSCR;
 744        reg.addr = (uintptr_t)&env->vscr;
 745        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 746        if (ret < 0) {
 747            trace_kvm_failed_vscr_get(strerror(errno));
 748            return ret;
 749        }
 750
 751        for (i = 0; i < 32; i++) {
 752            reg.id = KVM_REG_PPC_VR(i);
 753            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 754            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 755            if (ret < 0) {
 756                trace_kvm_failed_vr_get(i, strerror(errno));
 757                return ret;
 758            }
 759        }
 760    }
 761
 762    return 0;
 763}
 764
 765#if defined(TARGET_PPC64)
 766static int kvm_get_vpa(CPUState *cs)
 767{
 768    PowerPCCPU *cpu = POWERPC_CPU(cs);
 769    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 770    struct kvm_one_reg reg;
 771    int ret;
 772
 773    reg.id = KVM_REG_PPC_VPA_ADDR;
 774    reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 775    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 776    if (ret < 0) {
 777        trace_kvm_failed_vpa_addr_get(strerror(errno));
 778        return ret;
 779    }
 780
 781    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 782           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 783    reg.id = KVM_REG_PPC_VPA_SLB;
 784    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 785    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 786    if (ret < 0) {
 787        trace_kvm_failed_slb_get(strerror(errno));
 788        return ret;
 789    }
 790
 791    assert((uintptr_t)&spapr_cpu->dtl_size
 792           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 793    reg.id = KVM_REG_PPC_VPA_DTL;
 794    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 795    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 796    if (ret < 0) {
 797        trace_kvm_failed_dtl_get(strerror(errno));
 798        return ret;
 799    }
 800
 801    return 0;
 802}
 803
 804static int kvm_put_vpa(CPUState *cs)
 805{
 806    PowerPCCPU *cpu = POWERPC_CPU(cs);
 807    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 808    struct kvm_one_reg reg;
 809    int ret;
 810
 811    /*
 812     * SLB shadow or DTL can't be registered unless a master VPA is
 813     * registered.  That means when restoring state, if a VPA *is*
 814     * registered, we need to set that up first.  If not, we need to
 815     * deregister the others before deregistering the master VPA
 816     */
 817    assert(spapr_cpu->vpa_addr
 818           || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 819
 820    if (spapr_cpu->vpa_addr) {
 821        reg.id = KVM_REG_PPC_VPA_ADDR;
 822        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 823        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824        if (ret < 0) {
 825            trace_kvm_failed_vpa_addr_set(strerror(errno));
 826            return ret;
 827        }
 828    }
 829
 830    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 831           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 832    reg.id = KVM_REG_PPC_VPA_SLB;
 833    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 834    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 835    if (ret < 0) {
 836        trace_kvm_failed_slb_set(strerror(errno));
 837        return ret;
 838    }
 839
 840    assert((uintptr_t)&spapr_cpu->dtl_size
 841           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 842    reg.id = KVM_REG_PPC_VPA_DTL;
 843    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 844    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 845    if (ret < 0) {
 846        trace_kvm_failed_dtl_set(strerror(errno));
 847        return ret;
 848    }
 849
 850    if (!spapr_cpu->vpa_addr) {
 851        reg.id = KVM_REG_PPC_VPA_ADDR;
 852        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 853        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 854        if (ret < 0) {
 855            trace_kvm_failed_null_vpa_addr_set(strerror(errno));
 856            return ret;
 857        }
 858    }
 859
 860    return 0;
 861}
 862#endif /* TARGET_PPC64 */
 863
 864int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 865{
 866    CPUPPCState *env = &cpu->env;
 867    struct kvm_sregs sregs;
 868    int i;
 869
 870    sregs.pvr = env->spr[SPR_PVR];
 871
 872    if (cpu->vhyp) {
 873        PPCVirtualHypervisorClass *vhc =
 874            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 875        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 876    } else {
 877        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 878    }
 879
 880    /* Sync SLB */
 881#ifdef TARGET_PPC64
 882    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 883        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 884        if (env->slb[i].esid & SLB_ESID_V) {
 885            sregs.u.s.ppc64.slb[i].slbe |= i;
 886        }
 887        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 888    }
 889#endif
 890
 891    /* Sync SRs */
 892    for (i = 0; i < 16; i++) {
 893        sregs.u.s.ppc32.sr[i] = env->sr[i];
 894    }
 895
 896    /* Sync BATs */
 897    for (i = 0; i < 8; i++) {
 898        /* Beware. We have to swap upper and lower bits here */
 899        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 900            | env->DBAT[1][i];
 901        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 902            | env->IBAT[1][i];
 903    }
 904
 905    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 906}
 907
 908int kvm_arch_put_registers(CPUState *cs, int level)
 909{
 910    PowerPCCPU *cpu = POWERPC_CPU(cs);
 911    CPUPPCState *env = &cpu->env;
 912    struct kvm_regs regs;
 913    int ret;
 914    int i;
 915
 916    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 917    if (ret < 0) {
 918        return ret;
 919    }
 920
 921    regs.ctr = env->ctr;
 922    regs.lr  = env->lr;
 923    regs.xer = cpu_read_xer(env);
 924    regs.msr = env->msr;
 925    regs.pc = env->nip;
 926
 927    regs.srr0 = env->spr[SPR_SRR0];
 928    regs.srr1 = env->spr[SPR_SRR1];
 929
 930    regs.sprg0 = env->spr[SPR_SPRG0];
 931    regs.sprg1 = env->spr[SPR_SPRG1];
 932    regs.sprg2 = env->spr[SPR_SPRG2];
 933    regs.sprg3 = env->spr[SPR_SPRG3];
 934    regs.sprg4 = env->spr[SPR_SPRG4];
 935    regs.sprg5 = env->spr[SPR_SPRG5];
 936    regs.sprg6 = env->spr[SPR_SPRG6];
 937    regs.sprg7 = env->spr[SPR_SPRG7];
 938
 939    regs.pid = env->spr[SPR_BOOKE_PID];
 940
 941    for (i = 0; i < 32; i++) {
 942        regs.gpr[i] = env->gpr[i];
 943    }
 944
 945    regs.cr = 0;
 946    for (i = 0; i < 8; i++) {
 947        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 948    }
 949
 950    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 951    if (ret < 0) {
 952        return ret;
 953    }
 954
 955    kvm_put_fp(cs);
 956
 957    if (env->tlb_dirty) {
 958        kvm_sw_tlb_put(cpu);
 959        env->tlb_dirty = false;
 960    }
 961
 962    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 963        ret = kvmppc_put_books_sregs(cpu);
 964        if (ret < 0) {
 965            return ret;
 966        }
 967    }
 968
 969    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 970        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 971    }
 972
 973    if (cap_one_reg) {
 974        int i;
 975
 976        /*
 977         * We deliberately ignore errors here, for kernels which have
 978         * the ONE_REG calls, but don't support the specific
 979         * registers, there's a reasonable chance things will still
 980         * work, at least until we try to migrate.
 981         */
 982        for (i = 0; i < 1024; i++) {
 983            uint64_t id = env->spr_cb[i].one_reg_id;
 984
 985            if (id != 0) {
 986                kvm_put_one_spr(cs, id, i);
 987            }
 988        }
 989
 990#ifdef TARGET_PPC64
 991        if (msr_ts) {
 992            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 993                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 994            }
 995            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 996                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 997            }
 998            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 999            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1000            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1001            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1002            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1003            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1004            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1005            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1006            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1007            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1008        }
1009
1010        if (cap_papr) {
1011            if (kvm_put_vpa(cs) < 0) {
1012                trace_kvm_failed_put_vpa();
1013            }
1014        }
1015
1016        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1017#endif /* TARGET_PPC64 */
1018    }
1019
1020    return ret;
1021}
1022
1023static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1024{
1025     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1026}
1027
1028static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1029{
1030    CPUPPCState *env = &cpu->env;
1031    struct kvm_sregs sregs;
1032    int ret;
1033
1034    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1035    if (ret < 0) {
1036        return ret;
1037    }
1038
1039    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1040        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1041        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1042        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1043        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1044        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1045        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1046        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1047        env->spr[SPR_DECR] = sregs.u.e.dec;
1048        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1049        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1050        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1051    }
1052
1053    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1054        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1055        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1056        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1057        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1058        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1059    }
1060
1061    if (sregs.u.e.features & KVM_SREGS_E_64) {
1062        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1063    }
1064
1065    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1066        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1067    }
1068
1069    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1070        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1071        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1072        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1073        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1074        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1075        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1076        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1077        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1078        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1079        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1080        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1081        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1082        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1083        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1084        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1085        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1086        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1087        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1088        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1089        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1090        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1091        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1092        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1093        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1094        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1095        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1096        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1097        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1098        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1099        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1100        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1101        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1102
1103        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1104            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1105            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1106            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1107            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1108            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1109            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1110        }
1111
1112        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1113            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1114            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1115        }
1116
1117        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1118            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1119            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1120            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1121            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1122        }
1123    }
1124
1125    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1126        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1127        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1128        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1129        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1130        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1131        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1132        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1133        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1134        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1135        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1136    }
1137
1138    if (sregs.u.e.features & KVM_SREGS_EXP) {
1139        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1140    }
1141
1142    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1143        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1144        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1145    }
1146
1147    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1148        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1149        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1150        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1151
1152        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1153            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1154            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1155        }
1156    }
1157
1158    return 0;
1159}
1160
1161static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1162{
1163    CPUPPCState *env = &cpu->env;
1164    struct kvm_sregs sregs;
1165    int ret;
1166    int i;
1167
1168    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1169    if (ret < 0) {
1170        return ret;
1171    }
1172
1173    if (!cpu->vhyp) {
1174        ppc_store_sdr1(env, sregs.u.s.sdr1);
1175    }
1176
1177    /* Sync SLB */
1178#ifdef TARGET_PPC64
1179    /*
1180     * The packed SLB array we get from KVM_GET_SREGS only contains
1181     * information about valid entries. So we flush our internal copy
1182     * to get rid of stale ones, then put all valid SLB entries back
1183     * in.
1184     */
1185    memset(env->slb, 0, sizeof(env->slb));
1186    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1187        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1188        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1189        /*
1190         * Only restore valid entries
1191         */
1192        if (rb & SLB_ESID_V) {
1193            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1194        }
1195    }
1196#endif
1197
1198    /* Sync SRs */
1199    for (i = 0; i < 16; i++) {
1200        env->sr[i] = sregs.u.s.ppc32.sr[i];
1201    }
1202
1203    /* Sync BATs */
1204    for (i = 0; i < 8; i++) {
1205        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1206        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1207        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1208        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1209    }
1210
1211    return 0;
1212}
1213
1214int kvm_arch_get_registers(CPUState *cs)
1215{
1216    PowerPCCPU *cpu = POWERPC_CPU(cs);
1217    CPUPPCState *env = &cpu->env;
1218    struct kvm_regs regs;
1219    uint32_t cr;
1220    int i, ret;
1221
1222    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1223    if (ret < 0) {
1224        return ret;
1225    }
1226
1227    cr = regs.cr;
1228    for (i = 7; i >= 0; i--) {
1229        env->crf[i] = cr & 15;
1230        cr >>= 4;
1231    }
1232
1233    env->ctr = regs.ctr;
1234    env->lr = regs.lr;
1235    cpu_write_xer(env, regs.xer);
1236    env->msr = regs.msr;
1237    env->nip = regs.pc;
1238
1239    env->spr[SPR_SRR0] = regs.srr0;
1240    env->spr[SPR_SRR1] = regs.srr1;
1241
1242    env->spr[SPR_SPRG0] = regs.sprg0;
1243    env->spr[SPR_SPRG1] = regs.sprg1;
1244    env->spr[SPR_SPRG2] = regs.sprg2;
1245    env->spr[SPR_SPRG3] = regs.sprg3;
1246    env->spr[SPR_SPRG4] = regs.sprg4;
1247    env->spr[SPR_SPRG5] = regs.sprg5;
1248    env->spr[SPR_SPRG6] = regs.sprg6;
1249    env->spr[SPR_SPRG7] = regs.sprg7;
1250
1251    env->spr[SPR_BOOKE_PID] = regs.pid;
1252
1253    for (i = 0; i < 32; i++) {
1254        env->gpr[i] = regs.gpr[i];
1255    }
1256
1257    kvm_get_fp(cs);
1258
1259    if (cap_booke_sregs) {
1260        ret = kvmppc_get_booke_sregs(cpu);
1261        if (ret < 0) {
1262            return ret;
1263        }
1264    }
1265
1266    if (cap_segstate) {
1267        ret = kvmppc_get_books_sregs(cpu);
1268        if (ret < 0) {
1269            return ret;
1270        }
1271    }
1272
1273    if (cap_hior) {
1274        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1275    }
1276
1277    if (cap_one_reg) {
1278        int i;
1279
1280        /*
1281         * We deliberately ignore errors here, for kernels which have
1282         * the ONE_REG calls, but don't support the specific
1283         * registers, there's a reasonable chance things will still
1284         * work, at least until we try to migrate.
1285         */
1286        for (i = 0; i < 1024; i++) {
1287            uint64_t id = env->spr_cb[i].one_reg_id;
1288
1289            if (id != 0) {
1290                kvm_get_one_spr(cs, id, i);
1291            }
1292        }
1293
1294#ifdef TARGET_PPC64
1295        if (msr_ts) {
1296            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1297                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1298            }
1299            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1300                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1301            }
1302            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1303            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1304            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1305            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1306            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1307            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1308            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1309            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1310            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1311            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1312        }
1313
1314        if (cap_papr) {
1315            if (kvm_get_vpa(cs) < 0) {
1316                trace_kvm_failed_get_vpa();
1317            }
1318        }
1319
1320        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1321#endif
1322    }
1323
1324    return 0;
1325}
1326
1327int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1328{
1329    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1330
1331    if (irq != PPC_INTERRUPT_EXT) {
1332        return 0;
1333    }
1334
1335    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1336        return 0;
1337    }
1338
1339    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1340
1341    return 0;
1342}
1343
1344#if defined(TARGET_PPC64)
1345#define PPC_INPUT_INT PPC970_INPUT_INT
1346#else
1347#define PPC_INPUT_INT PPC6xx_INPUT_INT
1348#endif
1349
1350void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1351{
1352    PowerPCCPU *cpu = POWERPC_CPU(cs);
1353    CPUPPCState *env = &cpu->env;
1354    int r;
1355    unsigned irq;
1356
1357    qemu_mutex_lock_iothread();
1358
1359    /*
1360     * PowerPC QEMU tracks the various core input pins (interrupt,
1361     * critical interrupt, reset, etc) in PPC-specific
1362     * env->irq_input_state.
1363     */
1364    if (!cap_interrupt_level &&
1365        run->ready_for_interrupt_injection &&
1366        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1367        (env->irq_input_state & (1 << PPC_INPUT_INT)))
1368    {
1369        /*
1370         * For now KVM disregards the 'irq' argument. However, in the
1371         * future KVM could cache it in-kernel to avoid a heavyweight
1372         * exit when reading the UIC.
1373         */
1374        irq = KVM_INTERRUPT_SET;
1375
1376        trace_kvm_injected_interrupt(irq);
1377        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1378        if (r < 0) {
1379            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1380        }
1381
1382        /* Always wake up soon in case the interrupt was level based */
1383        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1384                       (NANOSECONDS_PER_SECOND / 50));
1385    }
1386
1387    /*
1388     * We don't know if there are more interrupts pending after
1389     * this. However, the guest will return to userspace in the course
1390     * of handling this one anyways, so we will get a chance to
1391     * deliver the rest.
1392     */
1393
1394    qemu_mutex_unlock_iothread();
1395}
1396
1397MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1398{
1399    return MEMTXATTRS_UNSPECIFIED;
1400}
1401
1402int kvm_arch_process_async_events(CPUState *cs)
1403{
1404    return cs->halted;
1405}
1406
1407static int kvmppc_handle_halt(PowerPCCPU *cpu)
1408{
1409    CPUState *cs = CPU(cpu);
1410    CPUPPCState *env = &cpu->env;
1411
1412    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1413        cs->halted = 1;
1414        cs->exception_index = EXCP_HLT;
1415    }
1416
1417    return 0;
1418}
1419
1420/* map dcr access to existing qemu dcr emulation */
1421static int kvmppc_handle_dcr_read(CPUPPCState *env,
1422                                  uint32_t dcrn, uint32_t *data)
1423{
1424    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) {
1425        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1426    }
1427
1428    return 0;
1429}
1430
1431static int kvmppc_handle_dcr_write(CPUPPCState *env,
1432                                   uint32_t dcrn, uint32_t data)
1433{
1434    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) {
1435        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1436    }
1437
1438    return 0;
1439}
1440
1441int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1442{
1443    /* Mixed endian case is not handled */
1444    uint32_t sc = debug_inst_opcode;
1445
1446    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447                            sizeof(sc), 0) ||
1448        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449        return -EINVAL;
1450    }
1451
1452    return 0;
1453}
1454
1455int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1456{
1457    uint32_t sc;
1458
1459    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460        sc != debug_inst_opcode ||
1461        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462                            sizeof(sc), 1)) {
1463        return -EINVAL;
1464    }
1465
1466    return 0;
1467}
1468
1469static int find_hw_breakpoint(target_ulong addr, int type)
1470{
1471    int n;
1472
1473    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474           <= ARRAY_SIZE(hw_debug_points));
1475
1476    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477        if (hw_debug_points[n].addr == addr &&
1478             hw_debug_points[n].type == type) {
1479            return n;
1480        }
1481    }
1482
1483    return -1;
1484}
1485
1486static int find_hw_watchpoint(target_ulong addr, int *flag)
1487{
1488    int n;
1489
1490    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491    if (n >= 0) {
1492        *flag = BP_MEM_ACCESS;
1493        return n;
1494    }
1495
1496    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497    if (n >= 0) {
1498        *flag = BP_MEM_WRITE;
1499        return n;
1500    }
1501
1502    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503    if (n >= 0) {
1504        *flag = BP_MEM_READ;
1505        return n;
1506    }
1507
1508    return -1;
1509}
1510
1511int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512                                  target_ulong len, int type)
1513{
1514    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515        return -ENOBUFS;
1516    }
1517
1518    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1520
1521    switch (type) {
1522    case GDB_BREAKPOINT_HW:
1523        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524            return -ENOBUFS;
1525        }
1526
1527        if (find_hw_breakpoint(addr, type) >= 0) {
1528            return -EEXIST;
1529        }
1530
1531        nb_hw_breakpoint++;
1532        break;
1533
1534    case GDB_WATCHPOINT_WRITE:
1535    case GDB_WATCHPOINT_READ:
1536    case GDB_WATCHPOINT_ACCESS:
1537        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538            return -ENOBUFS;
1539        }
1540
1541        if (find_hw_breakpoint(addr, type) >= 0) {
1542            return -EEXIST;
1543        }
1544
1545        nb_hw_watchpoint++;
1546        break;
1547
1548    default:
1549        return -ENOSYS;
1550    }
1551
1552    return 0;
1553}
1554
1555int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556                                  target_ulong len, int type)
1557{
1558    int n;
1559
1560    n = find_hw_breakpoint(addr, type);
1561    if (n < 0) {
1562        return -ENOENT;
1563    }
1564
1565    switch (type) {
1566    case GDB_BREAKPOINT_HW:
1567        nb_hw_breakpoint--;
1568        break;
1569
1570    case GDB_WATCHPOINT_WRITE:
1571    case GDB_WATCHPOINT_READ:
1572    case GDB_WATCHPOINT_ACCESS:
1573        nb_hw_watchpoint--;
1574        break;
1575
1576    default:
1577        return -ENOSYS;
1578    }
1579    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1580
1581    return 0;
1582}
1583
1584void kvm_arch_remove_all_hw_breakpoints(void)
1585{
1586    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1587}
1588
1589void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1590{
1591    int n;
1592
1593    /* Software Breakpoint updates */
1594    if (kvm_sw_breakpoints_active(cs)) {
1595        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1596    }
1597
1598    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599           <= ARRAY_SIZE(hw_debug_points));
1600    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1601
1602    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606            switch (hw_debug_points[n].type) {
1607            case GDB_BREAKPOINT_HW:
1608                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609                break;
1610            case GDB_WATCHPOINT_WRITE:
1611                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612                break;
1613            case GDB_WATCHPOINT_READ:
1614                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615                break;
1616            case GDB_WATCHPOINT_ACCESS:
1617                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618                                        KVMPPC_DEBUG_WATCH_READ;
1619                break;
1620            default:
1621                cpu_abort(cs, "Unsupported breakpoint type\n");
1622            }
1623            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1624        }
1625    }
1626}
1627
1628static int kvm_handle_hw_breakpoint(CPUState *cs,
1629                                    struct kvm_debug_exit_arch *arch_info)
1630{
1631    int handle = 0;
1632    int n;
1633    int flag = 0;
1634
1635    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1636        if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1637            n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1638            if (n >= 0) {
1639                handle = 1;
1640            }
1641        } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1642                                        KVMPPC_DEBUG_WATCH_WRITE)) {
1643            n = find_hw_watchpoint(arch_info->address,  &flag);
1644            if (n >= 0) {
1645                handle = 1;
1646                cs->watchpoint_hit = &hw_watchpoint;
1647                hw_watchpoint.vaddr = hw_debug_points[n].addr;
1648                hw_watchpoint.flags = flag;
1649            }
1650        }
1651    }
1652    return handle;
1653}
1654
1655static int kvm_handle_singlestep(void)
1656{
1657    return 1;
1658}
1659
1660static int kvm_handle_sw_breakpoint(void)
1661{
1662    return 1;
1663}
1664
1665static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1666{
1667    CPUState *cs = CPU(cpu);
1668    CPUPPCState *env = &cpu->env;
1669    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1670
1671    if (cs->singlestep_enabled) {
1672        return kvm_handle_singlestep();
1673    }
1674
1675    if (arch_info->status) {
1676        return kvm_handle_hw_breakpoint(cs, arch_info);
1677    }
1678
1679    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1680        return kvm_handle_sw_breakpoint();
1681    }
1682
1683    /*
1684     * QEMU is not able to handle debug exception, so inject
1685     * program exception to guest;
1686     * Yes program exception NOT debug exception !!
1687     * When QEMU is using debug resources then debug exception must
1688     * be always set. To achieve this we set MSR_DE and also set
1689     * MSRP_DEP so guest cannot change MSR_DE.
1690     * When emulating debug resource for guest we want guest
1691     * to control MSR_DE (enable/disable debug interrupt on need).
1692     * Supporting both configurations are NOT possible.
1693     * So the result is that we cannot share debug resources
1694     * between QEMU and Guest on BOOKE architecture.
1695     * In the current design QEMU gets the priority over guest,
1696     * this means that if QEMU is using debug resources then guest
1697     * cannot use them;
1698     * For software breakpoint QEMU uses a privileged instruction;
1699     * So there cannot be any reason that we are here for guest
1700     * set debug exception, only possibility is guest executed a
1701     * privileged / illegal instruction and that's why we are
1702     * injecting a program interrupt.
1703     */
1704    cpu_synchronize_state(cs);
1705    /*
1706     * env->nip is PC, so increment this by 4 to use
1707     * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1708     */
1709    env->nip += 4;
1710    cs->exception_index = POWERPC_EXCP_PROGRAM;
1711    env->error_code = POWERPC_EXCP_INVAL;
1712    ppc_cpu_do_interrupt(cs);
1713
1714    return 0;
1715}
1716
1717int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1718{
1719    PowerPCCPU *cpu = POWERPC_CPU(cs);
1720    CPUPPCState *env = &cpu->env;
1721    int ret;
1722
1723    qemu_mutex_lock_iothread();
1724
1725    switch (run->exit_reason) {
1726    case KVM_EXIT_DCR:
1727        if (run->dcr.is_write) {
1728            trace_kvm_handle_dcr_write();
1729            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1730        } else {
1731            trace_kvm_handle_dcr_read();
1732            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1733        }
1734        break;
1735    case KVM_EXIT_HLT:
1736        trace_kvm_handle_halt();
1737        ret = kvmppc_handle_halt(cpu);
1738        break;
1739#if defined(TARGET_PPC64)
1740    case KVM_EXIT_PAPR_HCALL:
1741        trace_kvm_handle_papr_hcall();
1742        run->papr_hcall.ret = spapr_hypercall(cpu,
1743                                              run->papr_hcall.nr,
1744                                              run->papr_hcall.args);
1745        ret = 0;
1746        break;
1747#endif
1748    case KVM_EXIT_EPR:
1749        trace_kvm_handle_epr();
1750        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1751        ret = 0;
1752        break;
1753    case KVM_EXIT_WATCHDOG:
1754        trace_kvm_handle_watchdog_expiry();
1755        watchdog_perform_action();
1756        ret = 0;
1757        break;
1758
1759    case KVM_EXIT_DEBUG:
1760        trace_kvm_handle_debug_exception();
1761        if (kvm_handle_debug(cpu, run)) {
1762            ret = EXCP_DEBUG;
1763            break;
1764        }
1765        /* re-enter, this exception was guest-internal */
1766        ret = 0;
1767        break;
1768
1769    default:
1770        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1771        ret = -1;
1772        break;
1773    }
1774
1775    qemu_mutex_unlock_iothread();
1776    return ret;
1777}
1778
1779int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1780{
1781    CPUState *cs = CPU(cpu);
1782    uint32_t bits = tsr_bits;
1783    struct kvm_one_reg reg = {
1784        .id = KVM_REG_PPC_OR_TSR,
1785        .addr = (uintptr_t) &bits,
1786    };
1787
1788    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1789}
1790
1791int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1792{
1793
1794    CPUState *cs = CPU(cpu);
1795    uint32_t bits = tsr_bits;
1796    struct kvm_one_reg reg = {
1797        .id = KVM_REG_PPC_CLEAR_TSR,
1798        .addr = (uintptr_t) &bits,
1799    };
1800
1801    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802}
1803
1804int kvmppc_set_tcr(PowerPCCPU *cpu)
1805{
1806    CPUState *cs = CPU(cpu);
1807    CPUPPCState *env = &cpu->env;
1808    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1809
1810    struct kvm_one_reg reg = {
1811        .id = KVM_REG_PPC_TCR,
1812        .addr = (uintptr_t) &tcr,
1813    };
1814
1815    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1816}
1817
1818int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1819{
1820    CPUState *cs = CPU(cpu);
1821    int ret;
1822
1823    if (!kvm_enabled()) {
1824        return -1;
1825    }
1826
1827    if (!cap_ppc_watchdog) {
1828        printf("warning: KVM does not support watchdog");
1829        return -1;
1830    }
1831
1832    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1833    if (ret < 0) {
1834        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1835                __func__, strerror(-ret));
1836        return ret;
1837    }
1838
1839    return ret;
1840}
1841
1842static int read_cpuinfo(const char *field, char *value, int len)
1843{
1844    FILE *f;
1845    int ret = -1;
1846    int field_len = strlen(field);
1847    char line[512];
1848
1849    f = fopen("/proc/cpuinfo", "r");
1850    if (!f) {
1851        return -1;
1852    }
1853
1854    do {
1855        if (!fgets(line, sizeof(line), f)) {
1856            break;
1857        }
1858        if (!strncmp(line, field, field_len)) {
1859            pstrcpy(value, len, line);
1860            ret = 0;
1861            break;
1862        }
1863    } while (*line);
1864
1865    fclose(f);
1866
1867    return ret;
1868}
1869
1870uint32_t kvmppc_get_tbfreq(void)
1871{
1872    char line[512];
1873    char *ns;
1874    uint32_t retval = NANOSECONDS_PER_SECOND;
1875
1876    if (read_cpuinfo("timebase", line, sizeof(line))) {
1877        return retval;
1878    }
1879
1880    ns = strchr(line, ':');
1881    if (!ns) {
1882        return retval;
1883    }
1884
1885    ns++;
1886
1887    return atoi(ns);
1888}
1889
1890bool kvmppc_get_host_serial(char **value)
1891{
1892    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1893                               NULL);
1894}
1895
1896bool kvmppc_get_host_model(char **value)
1897{
1898    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1899}
1900
1901/* Try to find a device tree node for a CPU with clock-frequency property */
1902static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1903{
1904    struct dirent *dirp;
1905    DIR *dp;
1906
1907    dp = opendir(PROC_DEVTREE_CPU);
1908    if (!dp) {
1909        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1910        return -1;
1911    }
1912
1913    buf[0] = '\0';
1914    while ((dirp = readdir(dp)) != NULL) {
1915        FILE *f;
1916        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1917                 dirp->d_name);
1918        f = fopen(buf, "r");
1919        if (f) {
1920            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1921            fclose(f);
1922            break;
1923        }
1924        buf[0] = '\0';
1925    }
1926    closedir(dp);
1927    if (buf[0] == '\0') {
1928        printf("Unknown host!\n");
1929        return -1;
1930    }
1931
1932    return 0;
1933}
1934
1935static uint64_t kvmppc_read_int_dt(const char *filename)
1936{
1937    union {
1938        uint32_t v32;
1939        uint64_t v64;
1940    } u;
1941    FILE *f;
1942    int len;
1943
1944    f = fopen(filename, "rb");
1945    if (!f) {
1946        return -1;
1947    }
1948
1949    len = fread(&u, 1, sizeof(u), f);
1950    fclose(f);
1951    switch (len) {
1952    case 4:
1953        /* property is a 32-bit quantity */
1954        return be32_to_cpu(u.v32);
1955    case 8:
1956        return be64_to_cpu(u.v64);
1957    }
1958
1959    return 0;
1960}
1961
1962/*
1963 * Read a CPU node property from the host device tree that's a single
1964 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1965 * (can't find or open the property, or doesn't understand the format)
1966 */
1967static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1968{
1969    char buf[PATH_MAX], *tmp;
1970    uint64_t val;
1971
1972    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1973        return -1;
1974    }
1975
1976    tmp = g_strdup_printf("%s/%s", buf, propname);
1977    val = kvmppc_read_int_dt(tmp);
1978    g_free(tmp);
1979
1980    return val;
1981}
1982
1983uint64_t kvmppc_get_clockfreq(void)
1984{
1985    return kvmppc_read_int_cpu_dt("clock-frequency");
1986}
1987
1988static int kvmppc_get_dec_bits(void)
1989{
1990    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1991
1992    if (nr_bits > 0) {
1993        return nr_bits;
1994    }
1995    return 0;
1996}
1997
1998static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1999{
2000    CPUState *cs = env_cpu(env);
2001
2002    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2003        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2004        return 0;
2005    }
2006
2007    return 1;
2008}
2009
2010int kvmppc_get_hasidle(CPUPPCState *env)
2011{
2012    struct kvm_ppc_pvinfo pvinfo;
2013
2014    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2015        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2016        return 1;
2017    }
2018
2019    return 0;
2020}
2021
2022int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2023{
2024    uint32_t *hc = (uint32_t *)buf;
2025    struct kvm_ppc_pvinfo pvinfo;
2026
2027    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2028        memcpy(buf, pvinfo.hcall, buf_len);
2029        return 0;
2030    }
2031
2032    /*
2033     * Fallback to always fail hypercalls regardless of endianness:
2034     *
2035     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2036     *     li r3, -1
2037     *     b .+8       (becomes nop in wrong endian)
2038     *     bswap32(li r3, -1)
2039     */
2040
2041    hc[0] = cpu_to_be32(0x08000048);
2042    hc[1] = cpu_to_be32(0x3860ffff);
2043    hc[2] = cpu_to_be32(0x48000008);
2044    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2045
2046    return 1;
2047}
2048
2049static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2050{
2051    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2052}
2053
2054void kvmppc_enable_logical_ci_hcalls(void)
2055{
2056    /*
2057     * FIXME: it would be nice if we could detect the cases where
2058     * we're using a device which requires the in kernel
2059     * implementation of these hcalls, but the kernel lacks them and
2060     * produce a warning.
2061     */
2062    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2063    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2064}
2065
2066void kvmppc_enable_set_mode_hcall(void)
2067{
2068    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2069}
2070
2071void kvmppc_enable_clear_ref_mod_hcalls(void)
2072{
2073    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2074    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2075}
2076
2077void kvmppc_enable_h_page_init(void)
2078{
2079    kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2080}
2081
2082void kvmppc_set_papr(PowerPCCPU *cpu)
2083{
2084    CPUState *cs = CPU(cpu);
2085    int ret;
2086
2087    if (!kvm_enabled()) {
2088        return;
2089    }
2090
2091    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2092    if (ret) {
2093        error_report("This vCPU type or KVM version does not support PAPR");
2094        exit(1);
2095    }
2096
2097    /*
2098     * Update the capability flag so we sync the right information
2099     * with kvm
2100     */
2101    cap_papr = 1;
2102}
2103
2104int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105{
2106    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107}
2108
2109void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110{
2111    CPUState *cs = CPU(cpu);
2112    int ret;
2113
2114    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115    if (ret && mpic_proxy) {
2116        error_report("This KVM version does not support EPR");
2117        exit(1);
2118    }
2119}
2120
2121int kvmppc_smt_threads(void)
2122{
2123    return cap_ppc_smt ? cap_ppc_smt : 1;
2124}
2125
2126int kvmppc_set_smt_threads(int smt)
2127{
2128    int ret;
2129
2130    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2131    if (!ret) {
2132        cap_ppc_smt = smt;
2133    }
2134    return ret;
2135}
2136
2137void kvmppc_hint_smt_possible(Error **errp)
2138{
2139    int i;
2140    GString *g;
2141    char *s;
2142
2143    assert(kvm_enabled());
2144    if (cap_ppc_smt_possible) {
2145        g = g_string_new("Available VSMT modes:");
2146        for (i = 63; i >= 0; i--) {
2147            if ((1UL << i) & cap_ppc_smt_possible) {
2148                g_string_append_printf(g, " %lu", (1UL << i));
2149            }
2150        }
2151        s = g_string_free(g, false);
2152        error_append_hint(errp, "%s.\n", s);
2153        g_free(s);
2154    } else {
2155        error_append_hint(errp,
2156                          "This KVM seems to be too old to support VSMT.\n");
2157    }
2158}
2159
2160
2161#ifdef TARGET_PPC64
2162uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2163{
2164    struct kvm_ppc_smmu_info info;
2165    long rampagesize, best_page_shift;
2166    int i;
2167
2168    /*
2169     * Find the largest hardware supported page size that's less than
2170     * or equal to the (logical) backing page size of guest RAM
2171     */
2172    kvm_get_smmu_info(&info, &error_fatal);
2173    rampagesize = qemu_minrampagesize();
2174    best_page_shift = 0;
2175
2176    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2177        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2178
2179        if (!sps->page_shift) {
2180            continue;
2181        }
2182
2183        if ((sps->page_shift > best_page_shift)
2184            && ((1UL << sps->page_shift) <= rampagesize)) {
2185            best_page_shift = sps->page_shift;
2186        }
2187    }
2188
2189    return MIN(current_size,
2190               1ULL << (best_page_shift + hash_shift - 7));
2191}
2192#endif
2193
2194bool kvmppc_spapr_use_multitce(void)
2195{
2196    return cap_spapr_multitce;
2197}
2198
2199int kvmppc_spapr_enable_inkernel_multitce(void)
2200{
2201    int ret;
2202
2203    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2204                            H_PUT_TCE_INDIRECT, 1);
2205    if (!ret) {
2206        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                                H_STUFF_TCE, 1);
2208    }
2209
2210    return ret;
2211}
2212
2213void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2214                              uint64_t bus_offset, uint32_t nb_table,
2215                              int *pfd, bool need_vfio)
2216{
2217    long len;
2218    int fd;
2219    void *table;
2220
2221    /*
2222     * Must set fd to -1 so we don't try to munmap when called for
2223     * destroying the table, which the upper layers -will- do
2224     */
2225    *pfd = -1;
2226    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2227        return NULL;
2228    }
2229
2230    if (cap_spapr_tce_64) {
2231        struct kvm_create_spapr_tce_64 args = {
2232            .liobn = liobn,
2233            .page_shift = page_shift,
2234            .offset = bus_offset >> page_shift,
2235            .size = nb_table,
2236            .flags = 0
2237        };
2238        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2239        if (fd < 0) {
2240            fprintf(stderr,
2241                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2242                    liobn);
2243            return NULL;
2244        }
2245    } else if (cap_spapr_tce) {
2246        uint64_t window_size = (uint64_t) nb_table << page_shift;
2247        struct kvm_create_spapr_tce args = {
2248            .liobn = liobn,
2249            .window_size = window_size,
2250        };
2251        if ((window_size != args.window_size) || bus_offset) {
2252            return NULL;
2253        }
2254        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2255        if (fd < 0) {
2256            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2257                    liobn);
2258            return NULL;
2259        }
2260    } else {
2261        return NULL;
2262    }
2263
2264    len = nb_table * sizeof(uint64_t);
2265    /* FIXME: round this up to page size */
2266
2267    table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2268    if (table == MAP_FAILED) {
2269        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2270                liobn);
2271        close(fd);
2272        return NULL;
2273    }
2274
2275    *pfd = fd;
2276    return table;
2277}
2278
2279int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2280{
2281    long len;
2282
2283    if (fd < 0) {
2284        return -1;
2285    }
2286
2287    len = nb_table * sizeof(uint64_t);
2288    if ((munmap(table, len) < 0) ||
2289        (close(fd) < 0)) {
2290        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2291                strerror(errno));
2292        /* Leak the table */
2293    }
2294
2295    return 0;
2296}
2297
2298int kvmppc_reset_htab(int shift_hint)
2299{
2300    uint32_t shift = shift_hint;
2301
2302    if (!kvm_enabled()) {
2303        /* Full emulation, tell caller to allocate htab itself */
2304        return 0;
2305    }
2306    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2307        int ret;
2308        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2309        if (ret == -ENOTTY) {
2310            /*
2311             * At least some versions of PR KVM advertise the
2312             * capability, but don't implement the ioctl().  Oops.
2313             * Return 0 so that we allocate the htab in qemu, as is
2314             * correct for PR.
2315             */
2316            return 0;
2317        } else if (ret < 0) {
2318            return ret;
2319        }
2320        return shift;
2321    }
2322
2323    /*
2324     * We have a kernel that predates the htab reset calls.  For PR
2325     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2326     * this era, it has allocated a 16MB fixed size hash table
2327     * already.
2328     */
2329    if (kvmppc_is_pr(kvm_state)) {
2330        /* PR - tell caller to allocate htab */
2331        return 0;
2332    } else {
2333        /* HV - assume 16MB kernel allocated htab */
2334        return 24;
2335    }
2336}
2337
2338static inline uint32_t mfpvr(void)
2339{
2340    uint32_t pvr;
2341
2342    asm ("mfpvr %0"
2343         : "=r"(pvr));
2344    return pvr;
2345}
2346
2347static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2348{
2349    if (on) {
2350        *word |= flags;
2351    } else {
2352        *word &= ~flags;
2353    }
2354}
2355
2356static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2357{
2358    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2360    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2361
2362    /* Now fix up the class with information we can query from the host */
2363    pcc->pvr = mfpvr();
2364
2365    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2366                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2367    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2368                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2369    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2370                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2371
2372    if (dcache_size != -1) {
2373        pcc->l1_dcache_size = dcache_size;
2374    }
2375
2376    if (icache_size != -1) {
2377        pcc->l1_icache_size = icache_size;
2378    }
2379
2380#if defined(TARGET_PPC64)
2381    pcc->radix_page_info = kvm_get_radix_page_info();
2382
2383    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2384        /*
2385         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2386         * compliant.  More importantly, advertising ISA 3.00
2387         * architected mode may prevent guests from activating
2388         * necessary DD1 workarounds.
2389         */
2390        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2391                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2392    }
2393#endif /* defined(TARGET_PPC64) */
2394}
2395
2396bool kvmppc_has_cap_epr(void)
2397{
2398    return cap_epr;
2399}
2400
2401bool kvmppc_has_cap_fixup_hcalls(void)
2402{
2403    return cap_fixup_hcalls;
2404}
2405
2406bool kvmppc_has_cap_htm(void)
2407{
2408    return cap_htm;
2409}
2410
2411bool kvmppc_has_cap_mmu_radix(void)
2412{
2413    return cap_mmu_radix;
2414}
2415
2416bool kvmppc_has_cap_mmu_hash_v3(void)
2417{
2418    return cap_mmu_hash_v3;
2419}
2420
2421static bool kvmppc_power8_host(void)
2422{
2423    bool ret = false;
2424#ifdef TARGET_PPC64
2425    {
2426        uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2427        ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2428              (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2429              (base_pvr == CPU_POWERPC_POWER8_BASE);
2430    }
2431#endif /* TARGET_PPC64 */
2432    return ret;
2433}
2434
2435static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2436{
2437    bool l1d_thread_priv_req = !kvmppc_power8_host();
2438
2439    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2440        return 2;
2441    } else if ((!l1d_thread_priv_req ||
2442                c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2443               (c.character & c.character_mask
2444                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2445        return 1;
2446    }
2447
2448    return 0;
2449}
2450
2451static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2452{
2453    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2454        return 2;
2455    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2456        return 1;
2457    }
2458
2459    return 0;
2460}
2461
2462static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2463{
2464    if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2465        (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2466        (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2467        return SPAPR_CAP_FIXED_NA;
2468    } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2469        return SPAPR_CAP_WORKAROUND;
2470    } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2471        return  SPAPR_CAP_FIXED_CCD;
2472    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2473        return SPAPR_CAP_FIXED_IBS;
2474    }
2475
2476    return 0;
2477}
2478
2479static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2480{
2481    if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2482        return 1;
2483    }
2484    return 0;
2485}
2486
2487bool kvmppc_has_cap_xive(void)
2488{
2489    return cap_xive;
2490}
2491
2492static void kvmppc_get_cpu_characteristics(KVMState *s)
2493{
2494    struct kvm_ppc_cpu_char c;
2495    int ret;
2496
2497    /* Assume broken */
2498    cap_ppc_safe_cache = 0;
2499    cap_ppc_safe_bounds_check = 0;
2500    cap_ppc_safe_indirect_branch = 0;
2501
2502    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2503    if (!ret) {
2504        return;
2505    }
2506    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2507    if (ret < 0) {
2508        return;
2509    }
2510
2511    cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2512    cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2513    cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2514    cap_ppc_count_cache_flush_assist =
2515        parse_cap_ppc_count_cache_flush_assist(c);
2516}
2517
2518int kvmppc_get_cap_safe_cache(void)
2519{
2520    return cap_ppc_safe_cache;
2521}
2522
2523int kvmppc_get_cap_safe_bounds_check(void)
2524{
2525    return cap_ppc_safe_bounds_check;
2526}
2527
2528int kvmppc_get_cap_safe_indirect_branch(void)
2529{
2530    return cap_ppc_safe_indirect_branch;
2531}
2532
2533int kvmppc_get_cap_count_cache_flush_assist(void)
2534{
2535    return cap_ppc_count_cache_flush_assist;
2536}
2537
2538bool kvmppc_has_cap_nested_kvm_hv(void)
2539{
2540    return !!cap_ppc_nested_kvm_hv;
2541}
2542
2543int kvmppc_set_cap_nested_kvm_hv(int enable)
2544{
2545    return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2546}
2547
2548bool kvmppc_has_cap_spapr_vfio(void)
2549{
2550    return cap_spapr_vfio;
2551}
2552
2553int kvmppc_get_cap_large_decr(void)
2554{
2555    return cap_large_decr;
2556}
2557
2558int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2559{
2560    CPUState *cs = CPU(cpu);
2561    uint64_t lpcr;
2562
2563    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2564    /* Do we need to modify the LPCR? */
2565    if (!!(lpcr & LPCR_LD) != !!enable) {
2566        if (enable) {
2567            lpcr |= LPCR_LD;
2568        } else {
2569            lpcr &= ~LPCR_LD;
2570        }
2571        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2572        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2573
2574        if (!!(lpcr & LPCR_LD) != !!enable) {
2575            return -1;
2576        }
2577    }
2578
2579    return 0;
2580}
2581
2582PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2583{
2584    uint32_t host_pvr = mfpvr();
2585    PowerPCCPUClass *pvr_pcc;
2586
2587    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2588    if (pvr_pcc == NULL) {
2589        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2590    }
2591
2592    return pvr_pcc;
2593}
2594
2595static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2596{
2597    TypeInfo type_info = {
2598        .name = TYPE_HOST_POWERPC_CPU,
2599        .class_init = kvmppc_host_cpu_class_init,
2600    };
2601    MachineClass *mc = MACHINE_GET_CLASS(ms);
2602    PowerPCCPUClass *pvr_pcc;
2603    ObjectClass *oc;
2604    DeviceClass *dc;
2605    int i;
2606
2607    pvr_pcc = kvm_ppc_get_host_cpu_class();
2608    if (pvr_pcc == NULL) {
2609        return -1;
2610    }
2611    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2612    type_register(&type_info);
2613    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2614        /* override TCG default cpu type with 'host' cpu model */
2615        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2616    }
2617
2618    oc = object_class_by_name(type_info.name);
2619    g_assert(oc);
2620
2621    /*
2622     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2623     * we want "POWER8" to be a "family" alias that points to the current
2624     * host CPU type, too)
2625     */
2626    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2627    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2628        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2629            char *suffix;
2630
2631            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2632            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2633            if (suffix) {
2634                *suffix = 0;
2635            }
2636            break;
2637        }
2638    }
2639
2640    return 0;
2641}
2642
2643int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2644{
2645    struct kvm_rtas_token_args args = {
2646        .token = token,
2647    };
2648
2649    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2650        return -ENOENT;
2651    }
2652
2653    strncpy(args.name, function, sizeof(args.name) - 1);
2654
2655    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2656}
2657
2658int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2659{
2660    struct kvm_get_htab_fd s = {
2661        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2662        .start_index = index,
2663    };
2664    int ret;
2665
2666    if (!cap_htab_fd) {
2667        error_setg(errp, "KVM version doesn't support %s the HPT",
2668                   write ? "writing" : "reading");
2669        return -ENOTSUP;
2670    }
2671
2672    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2673    if (ret < 0) {
2674        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2675                   write ? "writing" : "reading", write ? "to" : "from",
2676                   strerror(errno));
2677        return -errno;
2678    }
2679
2680    return ret;
2681}
2682
2683int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2684{
2685    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2686    uint8_t buf[bufsize];
2687    ssize_t rc;
2688
2689    do {
2690        rc = read(fd, buf, bufsize);
2691        if (rc < 0) {
2692            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2693                    strerror(errno));
2694            return rc;
2695        } else if (rc) {
2696            uint8_t *buffer = buf;
2697            ssize_t n = rc;
2698            while (n) {
2699                struct kvm_get_htab_header *head =
2700                    (struct kvm_get_htab_header *) buffer;
2701                size_t chunksize = sizeof(*head) +
2702                     HASH_PTE_SIZE_64 * head->n_valid;
2703
2704                qemu_put_be32(f, head->index);
2705                qemu_put_be16(f, head->n_valid);
2706                qemu_put_be16(f, head->n_invalid);
2707                qemu_put_buffer(f, (void *)(head + 1),
2708                                HASH_PTE_SIZE_64 * head->n_valid);
2709
2710                buffer += chunksize;
2711                n -= chunksize;
2712            }
2713        }
2714    } while ((rc != 0)
2715             && ((max_ns < 0) ||
2716                 ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2717
2718    return (rc == 0) ? 1 : 0;
2719}
2720
2721int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2722                           uint16_t n_valid, uint16_t n_invalid)
2723{
2724    struct kvm_get_htab_header *buf;
2725    size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64;
2726    ssize_t rc;
2727
2728    buf = alloca(chunksize);
2729    buf->index = index;
2730    buf->n_valid = n_valid;
2731    buf->n_invalid = n_invalid;
2732
2733    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid);
2734
2735    rc = write(fd, buf, chunksize);
2736    if (rc < 0) {
2737        fprintf(stderr, "Error writing KVM hash table: %s\n",
2738                strerror(errno));
2739        return rc;
2740    }
2741    if (rc != chunksize) {
2742        /* We should never get a short write on a single chunk */
2743        fprintf(stderr, "Short write, restoring KVM hash table\n");
2744        return -1;
2745    }
2746    return 0;
2747}
2748
2749bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2750{
2751    return true;
2752}
2753
2754void kvm_arch_init_irq_routing(KVMState *s)
2755{
2756}
2757
2758void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2759{
2760    int fd, rc;
2761    int i;
2762
2763    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2764
2765    i = 0;
2766    while (i < n) {
2767        struct kvm_get_htab_header *hdr;
2768        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2769        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2770
2771        rc = read(fd, buf, sizeof(buf));
2772        if (rc < 0) {
2773            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2774        }
2775
2776        hdr = (struct kvm_get_htab_header *)buf;
2777        while ((i < n) && ((char *)hdr < (buf + rc))) {
2778            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2779
2780            if (hdr->index != (ptex + i)) {
2781                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2782                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2783            }
2784
2785            if (n - i < valid) {
2786                valid = n - i;
2787            }
2788            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2789            i += valid;
2790
2791            if ((n - i) < invalid) {
2792                invalid = n - i;
2793            }
2794            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2795            i += invalid;
2796
2797            hdr = (struct kvm_get_htab_header *)
2798                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2799        }
2800    }
2801
2802    close(fd);
2803}
2804
2805void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2806{
2807    int fd, rc;
2808    struct {
2809        struct kvm_get_htab_header hdr;
2810        uint64_t pte0;
2811        uint64_t pte1;
2812    } buf;
2813
2814    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2815
2816    buf.hdr.n_valid = 1;
2817    buf.hdr.n_invalid = 0;
2818    buf.hdr.index = ptex;
2819    buf.pte0 = cpu_to_be64(pte0);
2820    buf.pte1 = cpu_to_be64(pte1);
2821
2822    rc = write(fd, &buf, sizeof(buf));
2823    if (rc != sizeof(buf)) {
2824        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2825    }
2826    close(fd);
2827}
2828
2829int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2830                             uint64_t address, uint32_t data, PCIDevice *dev)
2831{
2832    return 0;
2833}
2834
2835int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2836                                int vector, PCIDevice *dev)
2837{
2838    return 0;
2839}
2840
2841int kvm_arch_release_virq_post(int virq)
2842{
2843    return 0;
2844}
2845
2846int kvm_arch_msi_data_to_gsi(uint32_t data)
2847{
2848    return data & 0xffff;
2849}
2850
2851int kvmppc_enable_hwrng(void)
2852{
2853    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2854        return -1;
2855    }
2856
2857    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2858}
2859
2860void kvmppc_check_papr_resize_hpt(Error **errp)
2861{
2862    if (!kvm_enabled()) {
2863        return; /* No KVM, we're good */
2864    }
2865
2866    if (cap_resize_hpt) {
2867        return; /* Kernel has explicit support, we're good */
2868    }
2869
2870    /* Otherwise fallback on looking for PR KVM */
2871    if (kvmppc_is_pr(kvm_state)) {
2872        return;
2873    }
2874
2875    error_setg(errp,
2876               "Hash page table resizing not available with this KVM version");
2877}
2878
2879int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2880{
2881    CPUState *cs = CPU(cpu);
2882    struct kvm_ppc_resize_hpt rhpt = {
2883        .flags = flags,
2884        .shift = shift,
2885    };
2886
2887    if (!cap_resize_hpt) {
2888        return -ENOSYS;
2889    }
2890
2891    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2892}
2893
2894int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2895{
2896    CPUState *cs = CPU(cpu);
2897    struct kvm_ppc_resize_hpt rhpt = {
2898        .flags = flags,
2899        .shift = shift,
2900    };
2901
2902    if (!cap_resize_hpt) {
2903        return -ENOSYS;
2904    }
2905
2906    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2907}
2908
2909/*
2910 * This is a helper function to detect a post migration scenario
2911 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2912 * the guest kernel can't handle a PVR value other than the actual host
2913 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2914 *
2915 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2916 * (so, we're HV), return true. The workaround itself is done in
2917 * cpu_post_load.
2918 *
2919 * The order here is important: we'll only check for KVM PR as a
2920 * fallback if the guest kernel can't handle the situation itself.
2921 * We need to avoid as much as possible querying the running KVM type
2922 * in QEMU level.
2923 */
2924bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2925{
2926    CPUState *cs = CPU(cpu);
2927
2928    if (!kvm_enabled()) {
2929        return false;
2930    }
2931
2932    if (cap_ppc_pvr_compat) {
2933        return false;
2934    }
2935
2936    return !kvmppc_is_pr(cs->kvm_state);
2937}
2938
2939void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2940{
2941    CPUState *cs = CPU(cpu);
2942
2943    if (kvm_enabled()) {
2944        kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2945    }
2946}
2947
2948void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
2949{
2950    CPUState *cs = CPU(cpu);
2951
2952    if (kvm_enabled()) {
2953        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset);
2954    }
2955}
2956