qemu/target-ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include <dirent.h>
  18#include <sys/types.h>
  19#include <sys/ioctl.h>
  20#include <sys/mman.h>
  21#include <sys/vfs.h>
  22
  23#include <linux/kvm.h>
  24
  25#include "qemu-common.h"
  26#include "qemu/timer.h"
  27#include "sysemu/sysemu.h"
  28#include "sysemu/kvm.h"
  29#include "kvm_ppc.h"
  30#include "cpu.h"
  31#include "sysemu/cpus.h"
  32#include "sysemu/device_tree.h"
  33#include "mmu-hash64.h"
  34
  35#include "hw/sysbus.h"
  36#include "hw/ppc/spapr.h"
  37#include "hw/ppc/spapr_vio.h"
  38#include "sysemu/watchdog.h"
  39#include "trace.h"
  40
  41//#define DEBUG_KVM
  42
  43#ifdef DEBUG_KVM
  44#define DPRINTF(fmt, ...) \
  45    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46#else
  47#define DPRINTF(fmt, ...) \
  48    do { } while (0)
  49#endif
  50
  51#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  52
  53const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  54    KVM_CAP_LAST_INFO
  55};
  56
  57static int cap_interrupt_unset = false;
  58static int cap_interrupt_level = false;
  59static int cap_segstate;
  60static int cap_booke_sregs;
  61static int cap_ppc_smt;
  62static int cap_ppc_rma;
  63static int cap_spapr_tce;
  64static int cap_hior;
  65static int cap_one_reg;
  66static int cap_epr;
  67static int cap_ppc_watchdog;
  68static int cap_papr;
  69static int cap_htab_fd;
  70
  71/* XXX We have a race condition where we actually have a level triggered
  72 *     interrupt, but the infrastructure can't expose that yet, so the guest
  73 *     takes but ignores it, goes to sleep and never gets notified that there's
  74 *     still an interrupt pending.
  75 *
  76 *     As a quick workaround, let's just wake up again 20 ms after we injected
  77 *     an interrupt. That way we can assure that we're always reinjecting
  78 *     interrupts in case the guest swallowed them.
  79 */
  80static QEMUTimer *idle_timer;
  81
  82static void kvm_kick_cpu(void *opaque)
  83{
  84    PowerPCCPU *cpu = opaque;
  85
  86    qemu_cpu_kick(CPU(cpu));
  87}
  88
  89static int kvm_ppc_register_host_cpu_type(void);
  90
  91int kvm_arch_init(KVMState *s)
  92{
  93    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  94    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  95    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  96    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  97    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  98    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  99    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 100    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 101    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 102    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 103    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 104    /* Note: we don't set cap_papr here, because this capability is
 105     * only activated after this by kvmppc_set_papr() */
 106    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 107
 108    if (!cap_interrupt_level) {
 109        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 110                        "VM to stall at times!\n");
 111    }
 112
 113    kvm_ppc_register_host_cpu_type();
 114
 115    return 0;
 116}
 117
 118static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 119{
 120    CPUPPCState *cenv = &cpu->env;
 121    CPUState *cs = CPU(cpu);
 122    struct kvm_sregs sregs;
 123    int ret;
 124
 125    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 126        /* What we're really trying to say is "if we're on BookE, we use
 127           the native PVR for now". This is the only sane way to check
 128           it though, so we potentially confuse users that they can run
 129           BookE guests on BookS. Let's hope nobody dares enough :) */
 130        return 0;
 131    } else {
 132        if (!cap_segstate) {
 133            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 134            return -ENOSYS;
 135        }
 136    }
 137
 138    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 139    if (ret) {
 140        return ret;
 141    }
 142
 143    sregs.pvr = cenv->spr[SPR_PVR];
 144    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 145}
 146
 147/* Set up a shared TLB array with KVM */
 148static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 149{
 150    CPUPPCState *env = &cpu->env;
 151    CPUState *cs = CPU(cpu);
 152    struct kvm_book3e_206_tlb_params params = {};
 153    struct kvm_config_tlb cfg = {};
 154    struct kvm_enable_cap encap = {};
 155    unsigned int entries = 0;
 156    int ret, i;
 157
 158    if (!kvm_enabled() ||
 159        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 160        return 0;
 161    }
 162
 163    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 164
 165    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 166        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 167        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 168        entries += params.tlb_sizes[i];
 169    }
 170
 171    assert(entries == env->nb_tlb);
 172    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 173
 174    env->tlb_dirty = true;
 175
 176    cfg.array = (uintptr_t)env->tlb.tlbm;
 177    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 178    cfg.params = (uintptr_t)&params;
 179    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 180
 181    encap.cap = KVM_CAP_SW_TLB;
 182    encap.args[0] = (uintptr_t)&cfg;
 183
 184    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
 185    if (ret < 0) {
 186        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 187                __func__, strerror(-ret));
 188        return ret;
 189    }
 190
 191    env->kvm_sw_tlb = true;
 192    return 0;
 193}
 194
 195
 196#if defined(TARGET_PPC64)
 197static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 198                                       struct kvm_ppc_smmu_info *info)
 199{
 200    CPUPPCState *env = &cpu->env;
 201    CPUState *cs = CPU(cpu);
 202
 203    memset(info, 0, sizeof(*info));
 204
 205    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 206     * need to "guess" what the supported page sizes are.
 207     *
 208     * For that to work we make a few assumptions:
 209     *
 210     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 211     *   KVM which only supports 4K and 16M pages, but supports them
 212     *   regardless of the backing store characteritics. We also don't
 213     *   support 1T segments.
 214     *
 215     *   This is safe as if HV KVM ever supports that capability or PR
 216     *   KVM grows supports for more page/segment sizes, those versions
 217     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 218     *   will not hit this fallback
 219     *
 220     * - Else we are running HV KVM. This means we only support page
 221     *   sizes that fit in the backing store. Additionally we only
 222     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 223     *   P7 encodings for the SLB and hash table. Here too, we assume
 224     *   support for any newer processor will mean a kernel that
 225     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 226     *   this fallback.
 227     */
 228    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 229        /* No flags */
 230        info->flags = 0;
 231        info->slb_size = 64;
 232
 233        /* Standard 4k base page size segment */
 234        info->sps[0].page_shift = 12;
 235        info->sps[0].slb_enc = 0;
 236        info->sps[0].enc[0].page_shift = 12;
 237        info->sps[0].enc[0].pte_enc = 0;
 238
 239        /* Standard 16M large page size segment */
 240        info->sps[1].page_shift = 24;
 241        info->sps[1].slb_enc = SLB_VSID_L;
 242        info->sps[1].enc[0].page_shift = 24;
 243        info->sps[1].enc[0].pte_enc = 0;
 244    } else {
 245        int i = 0;
 246
 247        /* HV KVM has backing store size restrictions */
 248        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 249
 250        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 251            info->flags |= KVM_PPC_1T_SEGMENTS;
 252        }
 253
 254        if (env->mmu_model == POWERPC_MMU_2_06) {
 255            info->slb_size = 32;
 256        } else {
 257            info->slb_size = 64;
 258        }
 259
 260        /* Standard 4k base page size segment */
 261        info->sps[i].page_shift = 12;
 262        info->sps[i].slb_enc = 0;
 263        info->sps[i].enc[0].page_shift = 12;
 264        info->sps[i].enc[0].pte_enc = 0;
 265        i++;
 266
 267        /* 64K on MMU 2.06 */
 268        if (env->mmu_model == POWERPC_MMU_2_06) {
 269            info->sps[i].page_shift = 16;
 270            info->sps[i].slb_enc = 0x110;
 271            info->sps[i].enc[0].page_shift = 16;
 272            info->sps[i].enc[0].pte_enc = 1;
 273            i++;
 274        }
 275
 276        /* Standard 16M large page size segment */
 277        info->sps[i].page_shift = 24;
 278        info->sps[i].slb_enc = SLB_VSID_L;
 279        info->sps[i].enc[0].page_shift = 24;
 280        info->sps[i].enc[0].pte_enc = 0;
 281    }
 282}
 283
 284static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 285{
 286    CPUState *cs = CPU(cpu);
 287    int ret;
 288
 289    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 290        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 291        if (ret == 0) {
 292            return;
 293        }
 294    }
 295
 296    kvm_get_fallback_smmu_info(cpu, info);
 297}
 298
 299static long getrampagesize(void)
 300{
 301    struct statfs fs;
 302    int ret;
 303
 304    if (!mem_path) {
 305        /* guest RAM is backed by normal anonymous pages */
 306        return getpagesize();
 307    }
 308
 309    do {
 310        ret = statfs(mem_path, &fs);
 311    } while (ret != 0 && errno == EINTR);
 312
 313    if (ret != 0) {
 314        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 315                strerror(errno));
 316        exit(1);
 317    }
 318
 319#define HUGETLBFS_MAGIC       0x958458f6
 320
 321    if (fs.f_type != HUGETLBFS_MAGIC) {
 322        /* Explicit mempath, but it's ordinary pages */
 323        return getpagesize();
 324    }
 325
 326    /* It's hugepage, return the huge page size */
 327    return fs.f_bsize;
 328}
 329
 330static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 331{
 332    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 333        return true;
 334    }
 335
 336    return (1ul << shift) <= rampgsize;
 337}
 338
 339static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 340{
 341    static struct kvm_ppc_smmu_info smmu_info;
 342    static bool has_smmu_info;
 343    CPUPPCState *env = &cpu->env;
 344    long rampagesize;
 345    int iq, ik, jq, jk;
 346
 347    /* We only handle page sizes for 64-bit server guests for now */
 348    if (!(env->mmu_model & POWERPC_MMU_64)) {
 349        return;
 350    }
 351
 352    /* Collect MMU info from kernel if not already */
 353    if (!has_smmu_info) {
 354        kvm_get_smmu_info(cpu, &smmu_info);
 355        has_smmu_info = true;
 356    }
 357
 358    rampagesize = getrampagesize();
 359
 360    /* Convert to QEMU form */
 361    memset(&env->sps, 0, sizeof(env->sps));
 362
 363    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 364        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 365        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 366
 367        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 368                                 ksps->page_shift)) {
 369            continue;
 370        }
 371        qsps->page_shift = ksps->page_shift;
 372        qsps->slb_enc = ksps->slb_enc;
 373        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 374            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 375                                     ksps->enc[jk].page_shift)) {
 376                continue;
 377            }
 378            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 379            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 380            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 381                break;
 382            }
 383        }
 384        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 385            break;
 386        }
 387    }
 388    env->slb_nr = smmu_info.slb_size;
 389    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 390        env->mmu_model |= POWERPC_MMU_1TSEG;
 391    } else {
 392        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 393    }
 394}
 395#else /* defined (TARGET_PPC64) */
 396
 397static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 398{
 399}
 400
 401#endif /* !defined (TARGET_PPC64) */
 402
 403unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 404{
 405    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 406}
 407
 408int kvm_arch_init_vcpu(CPUState *cs)
 409{
 410    PowerPCCPU *cpu = POWERPC_CPU(cs);
 411    CPUPPCState *cenv = &cpu->env;
 412    int ret;
 413
 414    /* Gather server mmu info from KVM and update the CPU state */
 415    kvm_fixup_page_sizes(cpu);
 416
 417    /* Synchronize sregs with kvm */
 418    ret = kvm_arch_sync_sregs(cpu);
 419    if (ret) {
 420        return ret;
 421    }
 422
 423    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 424
 425    /* Some targets support access to KVM's guest TLB. */
 426    switch (cenv->mmu_model) {
 427    case POWERPC_MMU_BOOKE206:
 428        ret = kvm_booke206_tlb_init(cpu);
 429        break;
 430    default:
 431        break;
 432    }
 433
 434    return ret;
 435}
 436
 437void kvm_arch_reset_vcpu(CPUState *cpu)
 438{
 439}
 440
 441static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 442{
 443    CPUPPCState *env = &cpu->env;
 444    CPUState *cs = CPU(cpu);
 445    struct kvm_dirty_tlb dirty_tlb;
 446    unsigned char *bitmap;
 447    int ret;
 448
 449    if (!env->kvm_sw_tlb) {
 450        return;
 451    }
 452
 453    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 454    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 455
 456    dirty_tlb.bitmap = (uintptr_t)bitmap;
 457    dirty_tlb.num_dirty = env->nb_tlb;
 458
 459    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 460    if (ret) {
 461        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 462                __func__, strerror(-ret));
 463    }
 464
 465    g_free(bitmap);
 466}
 467
 468static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 469{
 470    PowerPCCPU *cpu = POWERPC_CPU(cs);
 471    CPUPPCState *env = &cpu->env;
 472    union {
 473        uint32_t u32;
 474        uint64_t u64;
 475    } val;
 476    struct kvm_one_reg reg = {
 477        .id = id,
 478        .addr = (uintptr_t) &val,
 479    };
 480    int ret;
 481
 482    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 483    if (ret != 0) {
 484        trace_kvm_failed_spr_get(spr, strerror(errno));
 485    } else {
 486        switch (id & KVM_REG_SIZE_MASK) {
 487        case KVM_REG_SIZE_U32:
 488            env->spr[spr] = val.u32;
 489            break;
 490
 491        case KVM_REG_SIZE_U64:
 492            env->spr[spr] = val.u64;
 493            break;
 494
 495        default:
 496            /* Don't handle this size yet */
 497            abort();
 498        }
 499    }
 500}
 501
 502static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 503{
 504    PowerPCCPU *cpu = POWERPC_CPU(cs);
 505    CPUPPCState *env = &cpu->env;
 506    union {
 507        uint32_t u32;
 508        uint64_t u64;
 509    } val;
 510    struct kvm_one_reg reg = {
 511        .id = id,
 512        .addr = (uintptr_t) &val,
 513    };
 514    int ret;
 515
 516    switch (id & KVM_REG_SIZE_MASK) {
 517    case KVM_REG_SIZE_U32:
 518        val.u32 = env->spr[spr];
 519        break;
 520
 521    case KVM_REG_SIZE_U64:
 522        val.u64 = env->spr[spr];
 523        break;
 524
 525    default:
 526        /* Don't handle this size yet */
 527        abort();
 528    }
 529
 530    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 531    if (ret != 0) {
 532        trace_kvm_failed_spr_set(spr, strerror(errno));
 533    }
 534}
 535
 536static int kvm_put_fp(CPUState *cs)
 537{
 538    PowerPCCPU *cpu = POWERPC_CPU(cs);
 539    CPUPPCState *env = &cpu->env;
 540    struct kvm_one_reg reg;
 541    int i;
 542    int ret;
 543
 544    if (env->insns_flags & PPC_FLOAT) {
 545        uint64_t fpscr = env->fpscr;
 546        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 547
 548        reg.id = KVM_REG_PPC_FPSCR;
 549        reg.addr = (uintptr_t)&fpscr;
 550        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 551        if (ret < 0) {
 552            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 553            return ret;
 554        }
 555
 556        for (i = 0; i < 32; i++) {
 557            uint64_t vsr[2];
 558
 559            vsr[0] = float64_val(env->fpr[i]);
 560            vsr[1] = env->vsr[i];
 561            reg.addr = (uintptr_t) &vsr;
 562            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 563
 564            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 565            if (ret < 0) {
 566                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 567                        i, strerror(errno));
 568                return ret;
 569            }
 570        }
 571    }
 572
 573    if (env->insns_flags & PPC_ALTIVEC) {
 574        reg.id = KVM_REG_PPC_VSCR;
 575        reg.addr = (uintptr_t)&env->vscr;
 576        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 577        if (ret < 0) {
 578            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 579            return ret;
 580        }
 581
 582        for (i = 0; i < 32; i++) {
 583            reg.id = KVM_REG_PPC_VR(i);
 584            reg.addr = (uintptr_t)&env->avr[i];
 585            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 586            if (ret < 0) {
 587                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 588                return ret;
 589            }
 590        }
 591    }
 592
 593    return 0;
 594}
 595
 596static int kvm_get_fp(CPUState *cs)
 597{
 598    PowerPCCPU *cpu = POWERPC_CPU(cs);
 599    CPUPPCState *env = &cpu->env;
 600    struct kvm_one_reg reg;
 601    int i;
 602    int ret;
 603
 604    if (env->insns_flags & PPC_FLOAT) {
 605        uint64_t fpscr;
 606        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 607
 608        reg.id = KVM_REG_PPC_FPSCR;
 609        reg.addr = (uintptr_t)&fpscr;
 610        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 611        if (ret < 0) {
 612            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 613            return ret;
 614        } else {
 615            env->fpscr = fpscr;
 616        }
 617
 618        for (i = 0; i < 32; i++) {
 619            uint64_t vsr[2];
 620
 621            reg.addr = (uintptr_t) &vsr;
 622            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 623
 624            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 625            if (ret < 0) {
 626                DPRINTF("Unable to get %s%d from KVM: %s\n",
 627                        vsx ? "VSR" : "FPR", i, strerror(errno));
 628                return ret;
 629            } else {
 630                env->fpr[i] = vsr[0];
 631                if (vsx) {
 632                    env->vsr[i] = vsr[1];
 633                }
 634            }
 635        }
 636    }
 637
 638    if (env->insns_flags & PPC_ALTIVEC) {
 639        reg.id = KVM_REG_PPC_VSCR;
 640        reg.addr = (uintptr_t)&env->vscr;
 641        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 642        if (ret < 0) {
 643            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 644            return ret;
 645        }
 646
 647        for (i = 0; i < 32; i++) {
 648            reg.id = KVM_REG_PPC_VR(i);
 649            reg.addr = (uintptr_t)&env->avr[i];
 650            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 651            if (ret < 0) {
 652                DPRINTF("Unable to get VR%d from KVM: %s\n",
 653                        i, strerror(errno));
 654                return ret;
 655            }
 656        }
 657    }
 658
 659    return 0;
 660}
 661
 662#if defined(TARGET_PPC64)
 663static int kvm_get_vpa(CPUState *cs)
 664{
 665    PowerPCCPU *cpu = POWERPC_CPU(cs);
 666    CPUPPCState *env = &cpu->env;
 667    struct kvm_one_reg reg;
 668    int ret;
 669
 670    reg.id = KVM_REG_PPC_VPA_ADDR;
 671    reg.addr = (uintptr_t)&env->vpa_addr;
 672    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 673    if (ret < 0) {
 674        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 675        return ret;
 676    }
 677
 678    assert((uintptr_t)&env->slb_shadow_size
 679           == ((uintptr_t)&env->slb_shadow_addr + 8));
 680    reg.id = KVM_REG_PPC_VPA_SLB;
 681    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 682    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 683    if (ret < 0) {
 684        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 685                strerror(errno));
 686        return ret;
 687    }
 688
 689    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 690    reg.id = KVM_REG_PPC_VPA_DTL;
 691    reg.addr = (uintptr_t)&env->dtl_addr;
 692    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 693    if (ret < 0) {
 694        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 695                strerror(errno));
 696        return ret;
 697    }
 698
 699    return 0;
 700}
 701
 702static int kvm_put_vpa(CPUState *cs)
 703{
 704    PowerPCCPU *cpu = POWERPC_CPU(cs);
 705    CPUPPCState *env = &cpu->env;
 706    struct kvm_one_reg reg;
 707    int ret;
 708
 709    /* SLB shadow or DTL can't be registered unless a master VPA is
 710     * registered.  That means when restoring state, if a VPA *is*
 711     * registered, we need to set that up first.  If not, we need to
 712     * deregister the others before deregistering the master VPA */
 713    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 714
 715    if (env->vpa_addr) {
 716        reg.id = KVM_REG_PPC_VPA_ADDR;
 717        reg.addr = (uintptr_t)&env->vpa_addr;
 718        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719        if (ret < 0) {
 720            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 721            return ret;
 722        }
 723    }
 724
 725    assert((uintptr_t)&env->slb_shadow_size
 726           == ((uintptr_t)&env->slb_shadow_addr + 8));
 727    reg.id = KVM_REG_PPC_VPA_SLB;
 728    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 729    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 730    if (ret < 0) {
 731        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 732        return ret;
 733    }
 734
 735    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 736    reg.id = KVM_REG_PPC_VPA_DTL;
 737    reg.addr = (uintptr_t)&env->dtl_addr;
 738    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 739    if (ret < 0) {
 740        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 741                strerror(errno));
 742        return ret;
 743    }
 744
 745    if (!env->vpa_addr) {
 746        reg.id = KVM_REG_PPC_VPA_ADDR;
 747        reg.addr = (uintptr_t)&env->vpa_addr;
 748        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 749        if (ret < 0) {
 750            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 751            return ret;
 752        }
 753    }
 754
 755    return 0;
 756}
 757#endif /* TARGET_PPC64 */
 758
 759int kvm_arch_put_registers(CPUState *cs, int level)
 760{
 761    PowerPCCPU *cpu = POWERPC_CPU(cs);
 762    CPUPPCState *env = &cpu->env;
 763    struct kvm_regs regs;
 764    int ret;
 765    int i;
 766
 767    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 768    if (ret < 0) {
 769        return ret;
 770    }
 771
 772    regs.ctr = env->ctr;
 773    regs.lr  = env->lr;
 774    regs.xer = cpu_read_xer(env);
 775    regs.msr = env->msr;
 776    regs.pc = env->nip;
 777
 778    regs.srr0 = env->spr[SPR_SRR0];
 779    regs.srr1 = env->spr[SPR_SRR1];
 780
 781    regs.sprg0 = env->spr[SPR_SPRG0];
 782    regs.sprg1 = env->spr[SPR_SPRG1];
 783    regs.sprg2 = env->spr[SPR_SPRG2];
 784    regs.sprg3 = env->spr[SPR_SPRG3];
 785    regs.sprg4 = env->spr[SPR_SPRG4];
 786    regs.sprg5 = env->spr[SPR_SPRG5];
 787    regs.sprg6 = env->spr[SPR_SPRG6];
 788    regs.sprg7 = env->spr[SPR_SPRG7];
 789
 790    regs.pid = env->spr[SPR_BOOKE_PID];
 791
 792    for (i = 0;i < 32; i++)
 793        regs.gpr[i] = env->gpr[i];
 794
 795    regs.cr = 0;
 796    for (i = 0; i < 8; i++) {
 797        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 798    }
 799
 800    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 801    if (ret < 0)
 802        return ret;
 803
 804    kvm_put_fp(cs);
 805
 806    if (env->tlb_dirty) {
 807        kvm_sw_tlb_put(cpu);
 808        env->tlb_dirty = false;
 809    }
 810
 811    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 812        struct kvm_sregs sregs;
 813
 814        sregs.pvr = env->spr[SPR_PVR];
 815
 816        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 817
 818        /* Sync SLB */
 819#ifdef TARGET_PPC64
 820        for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 821            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 822            if (env->slb[i].esid & SLB_ESID_V) {
 823                sregs.u.s.ppc64.slb[i].slbe |= i;
 824            }
 825            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 826        }
 827#endif
 828
 829        /* Sync SRs */
 830        for (i = 0; i < 16; i++) {
 831            sregs.u.s.ppc32.sr[i] = env->sr[i];
 832        }
 833
 834        /* Sync BATs */
 835        for (i = 0; i < 8; i++) {
 836            /* Beware. We have to swap upper and lower bits here */
 837            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 838                | env->DBAT[1][i];
 839            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 840                | env->IBAT[1][i];
 841        }
 842
 843        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 844        if (ret) {
 845            return ret;
 846        }
 847    }
 848
 849    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 850        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 851    }
 852
 853    if (cap_one_reg) {
 854        int i;
 855
 856        /* We deliberately ignore errors here, for kernels which have
 857         * the ONE_REG calls, but don't support the specific
 858         * registers, there's a reasonable chance things will still
 859         * work, at least until we try to migrate. */
 860        for (i = 0; i < 1024; i++) {
 861            uint64_t id = env->spr_cb[i].one_reg_id;
 862
 863            if (id != 0) {
 864                kvm_put_one_spr(cs, id, i);
 865            }
 866        }
 867
 868#ifdef TARGET_PPC64
 869        if (cap_papr) {
 870            if (kvm_put_vpa(cs) < 0) {
 871                DPRINTF("Warning: Unable to set VPA information to KVM\n");
 872            }
 873        }
 874#endif /* TARGET_PPC64 */
 875    }
 876
 877    return ret;
 878}
 879
 880int kvm_arch_get_registers(CPUState *cs)
 881{
 882    PowerPCCPU *cpu = POWERPC_CPU(cs);
 883    CPUPPCState *env = &cpu->env;
 884    struct kvm_regs regs;
 885    struct kvm_sregs sregs;
 886    uint32_t cr;
 887    int i, ret;
 888
 889    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 890    if (ret < 0)
 891        return ret;
 892
 893    cr = regs.cr;
 894    for (i = 7; i >= 0; i--) {
 895        env->crf[i] = cr & 15;
 896        cr >>= 4;
 897    }
 898
 899    env->ctr = regs.ctr;
 900    env->lr = regs.lr;
 901    cpu_write_xer(env, regs.xer);
 902    env->msr = regs.msr;
 903    env->nip = regs.pc;
 904
 905    env->spr[SPR_SRR0] = regs.srr0;
 906    env->spr[SPR_SRR1] = regs.srr1;
 907
 908    env->spr[SPR_SPRG0] = regs.sprg0;
 909    env->spr[SPR_SPRG1] = regs.sprg1;
 910    env->spr[SPR_SPRG2] = regs.sprg2;
 911    env->spr[SPR_SPRG3] = regs.sprg3;
 912    env->spr[SPR_SPRG4] = regs.sprg4;
 913    env->spr[SPR_SPRG5] = regs.sprg5;
 914    env->spr[SPR_SPRG6] = regs.sprg6;
 915    env->spr[SPR_SPRG7] = regs.sprg7;
 916
 917    env->spr[SPR_BOOKE_PID] = regs.pid;
 918
 919    for (i = 0;i < 32; i++)
 920        env->gpr[i] = regs.gpr[i];
 921
 922    kvm_get_fp(cs);
 923
 924    if (cap_booke_sregs) {
 925        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 926        if (ret < 0) {
 927            return ret;
 928        }
 929
 930        if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 931            env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 932            env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 933            env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 934            env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 935            env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 936            env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 937            env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 938            env->spr[SPR_DECR] = sregs.u.e.dec;
 939            env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 940            env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 941            env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 942        }
 943
 944        if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 945            env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 946            env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 947            env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 948            env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 949            env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 950        }
 951
 952        if (sregs.u.e.features & KVM_SREGS_E_64) {
 953            env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 954        }
 955
 956        if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 957            env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 958        }
 959
 960        if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 961            env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 962            env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 963            env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 964            env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 965            env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 966            env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 967            env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 968            env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 969            env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 970            env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 971            env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 972            env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 973            env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 974            env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 975            env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 976            env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 977
 978            if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 979                env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 980                env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 981                env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 982            }
 983
 984            if (sregs.u.e.features & KVM_SREGS_E_PM) {
 985                env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 986            }
 987
 988            if (sregs.u.e.features & KVM_SREGS_E_PC) {
 989                env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 990                env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 991            }
 992        }
 993
 994        if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 995            env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 996            env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 997            env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 998            env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 999            env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000            env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001            env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002            env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003            env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004            env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005        }
1006
1007        if (sregs.u.e.features & KVM_SREGS_EXP) {
1008            env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009        }
1010
1011        if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012            env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013            env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014        }
1015
1016        if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017            env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018            env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019            env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1020
1021            if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022                env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023                env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024            }
1025        }
1026    }
1027
1028    if (cap_segstate) {
1029        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030        if (ret < 0) {
1031            return ret;
1032        }
1033
1034        if (!env->external_htab) {
1035            ppc_store_sdr1(env, sregs.u.s.sdr1);
1036        }
1037
1038        /* Sync SLB */
1039#ifdef TARGET_PPC64
1040        /*
1041         * The packed SLB array we get from KVM_GET_SREGS only contains
1042         * information about valid entries. So we flush our internal
1043         * copy to get rid of stale ones, then put all valid SLB entries
1044         * back in.
1045         */
1046        memset(env->slb, 0, sizeof(env->slb));
1047        for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048            target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049            target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1050            /*
1051             * Only restore valid entries
1052             */
1053            if (rb & SLB_ESID_V) {
1054                ppc_store_slb(env, rb, rs);
1055            }
1056        }
1057#endif
1058
1059        /* Sync SRs */
1060        for (i = 0; i < 16; i++) {
1061            env->sr[i] = sregs.u.s.ppc32.sr[i];
1062        }
1063
1064        /* Sync BATs */
1065        for (i = 0; i < 8; i++) {
1066            env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067            env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068            env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069            env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1070        }
1071    }
1072
1073    if (cap_hior) {
1074        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1075    }
1076
1077    if (cap_one_reg) {
1078        int i;
1079
1080        /* We deliberately ignore errors here, for kernels which have
1081         * the ONE_REG calls, but don't support the specific
1082         * registers, there's a reasonable chance things will still
1083         * work, at least until we try to migrate. */
1084        for (i = 0; i < 1024; i++) {
1085            uint64_t id = env->spr_cb[i].one_reg_id;
1086
1087            if (id != 0) {
1088                kvm_get_one_spr(cs, id, i);
1089            }
1090        }
1091
1092#ifdef TARGET_PPC64
1093        if (cap_papr) {
1094            if (kvm_get_vpa(cs) < 0) {
1095                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096            }
1097        }
1098#endif
1099    }
1100
1101    return 0;
1102}
1103
1104int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1105{
1106    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1107
1108    if (irq != PPC_INTERRUPT_EXT) {
1109        return 0;
1110    }
1111
1112    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113        return 0;
1114    }
1115
1116    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1117
1118    return 0;
1119}
1120
1121#if defined(TARGET_PPCEMB)
1122#define PPC_INPUT_INT PPC40x_INPUT_INT
1123#elif defined(TARGET_PPC64)
1124#define PPC_INPUT_INT PPC970_INPUT_INT
1125#else
1126#define PPC_INPUT_INT PPC6xx_INPUT_INT
1127#endif
1128
1129void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1130{
1131    PowerPCCPU *cpu = POWERPC_CPU(cs);
1132    CPUPPCState *env = &cpu->env;
1133    int r;
1134    unsigned irq;
1135
1136    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138    if (!cap_interrupt_level &&
1139        run->ready_for_interrupt_injection &&
1140        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1142    {
1143        /* For now KVM disregards the 'irq' argument. However, in the
1144         * future KVM could cache it in-kernel to avoid a heavyweight exit
1145         * when reading the UIC.
1146         */
1147        irq = KVM_INTERRUPT_SET;
1148
1149        DPRINTF("injected interrupt %d\n", irq);
1150        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151        if (r < 0) {
1152            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1153        }
1154
1155        /* Always wake up soon in case the interrupt was level based */
1156        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157                       (get_ticks_per_sec() / 50));
1158    }
1159
1160    /* We don't know if there are more interrupts pending after this. However,
1161     * the guest will return to userspace in the course of handling this one
1162     * anyways, so we will get a chance to deliver the rest. */
1163}
1164
1165void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1166{
1167}
1168
1169int kvm_arch_process_async_events(CPUState *cs)
1170{
1171    return cs->halted;
1172}
1173
1174static int kvmppc_handle_halt(PowerPCCPU *cpu)
1175{
1176    CPUState *cs = CPU(cpu);
1177    CPUPPCState *env = &cpu->env;
1178
1179    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180        cs->halted = 1;
1181        cs->exception_index = EXCP_HLT;
1182    }
1183
1184    return 0;
1185}
1186
1187/* map dcr access to existing qemu dcr emulation */
1188static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1189{
1190    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1192
1193    return 0;
1194}
1195
1196static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1197{
1198    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1200
1201    return 0;
1202}
1203
1204int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1205{
1206    PowerPCCPU *cpu = POWERPC_CPU(cs);
1207    CPUPPCState *env = &cpu->env;
1208    int ret;
1209
1210    switch (run->exit_reason) {
1211    case KVM_EXIT_DCR:
1212        if (run->dcr.is_write) {
1213            DPRINTF("handle dcr write\n");
1214            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215        } else {
1216            DPRINTF("handle dcr read\n");
1217            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1218        }
1219        break;
1220    case KVM_EXIT_HLT:
1221        DPRINTF("handle halt\n");
1222        ret = kvmppc_handle_halt(cpu);
1223        break;
1224#if defined(TARGET_PPC64)
1225    case KVM_EXIT_PAPR_HCALL:
1226        DPRINTF("handle PAPR hypercall\n");
1227        run->papr_hcall.ret = spapr_hypercall(cpu,
1228                                              run->papr_hcall.nr,
1229                                              run->papr_hcall.args);
1230        ret = 0;
1231        break;
1232#endif
1233    case KVM_EXIT_EPR:
1234        DPRINTF("handle epr\n");
1235        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236        ret = 0;
1237        break;
1238    case KVM_EXIT_WATCHDOG:
1239        DPRINTF("handle watchdog expiry\n");
1240        watchdog_perform_action();
1241        ret = 0;
1242        break;
1243
1244    default:
1245        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246        ret = -1;
1247        break;
1248    }
1249
1250    return ret;
1251}
1252
1253int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1254{
1255    CPUState *cs = CPU(cpu);
1256    uint32_t bits = tsr_bits;
1257    struct kvm_one_reg reg = {
1258        .id = KVM_REG_PPC_OR_TSR,
1259        .addr = (uintptr_t) &bits,
1260    };
1261
1262    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1263}
1264
1265int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1266{
1267
1268    CPUState *cs = CPU(cpu);
1269    uint32_t bits = tsr_bits;
1270    struct kvm_one_reg reg = {
1271        .id = KVM_REG_PPC_CLEAR_TSR,
1272        .addr = (uintptr_t) &bits,
1273    };
1274
1275    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1276}
1277
1278int kvmppc_set_tcr(PowerPCCPU *cpu)
1279{
1280    CPUState *cs = CPU(cpu);
1281    CPUPPCState *env = &cpu->env;
1282    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1283
1284    struct kvm_one_reg reg = {
1285        .id = KVM_REG_PPC_TCR,
1286        .addr = (uintptr_t) &tcr,
1287    };
1288
1289    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1290}
1291
1292int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1293{
1294    CPUState *cs = CPU(cpu);
1295    struct kvm_enable_cap encap = {};
1296    int ret;
1297
1298    if (!kvm_enabled()) {
1299        return -1;
1300    }
1301
1302    if (!cap_ppc_watchdog) {
1303        printf("warning: KVM does not support watchdog");
1304        return -1;
1305    }
1306
1307    encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309    if (ret < 0) {
1310        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311                __func__, strerror(-ret));
1312        return ret;
1313    }
1314
1315    return ret;
1316}
1317
1318static int read_cpuinfo(const char *field, char *value, int len)
1319{
1320    FILE *f;
1321    int ret = -1;
1322    int field_len = strlen(field);
1323    char line[512];
1324
1325    f = fopen("/proc/cpuinfo", "r");
1326    if (!f) {
1327        return -1;
1328    }
1329
1330    do {
1331        if(!fgets(line, sizeof(line), f)) {
1332            break;
1333        }
1334        if (!strncmp(line, field, field_len)) {
1335            pstrcpy(value, len, line);
1336            ret = 0;
1337            break;
1338        }
1339    } while(*line);
1340
1341    fclose(f);
1342
1343    return ret;
1344}
1345
1346uint32_t kvmppc_get_tbfreq(void)
1347{
1348    char line[512];
1349    char *ns;
1350    uint32_t retval = get_ticks_per_sec();
1351
1352    if (read_cpuinfo("timebase", line, sizeof(line))) {
1353        return retval;
1354    }
1355
1356    if (!(ns = strchr(line, ':'))) {
1357        return retval;
1358    }
1359
1360    ns++;
1361
1362    retval = atoi(ns);
1363    return retval;
1364}
1365
1366/* Try to find a device tree node for a CPU with clock-frequency property */
1367static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1368{
1369    struct dirent *dirp;
1370    DIR *dp;
1371
1372    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374        return -1;
1375    }
1376
1377    buf[0] = '\0';
1378    while ((dirp = readdir(dp)) != NULL) {
1379        FILE *f;
1380        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381                 dirp->d_name);
1382        f = fopen(buf, "r");
1383        if (f) {
1384            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385            fclose(f);
1386            break;
1387        }
1388        buf[0] = '\0';
1389    }
1390    closedir(dp);
1391    if (buf[0] == '\0') {
1392        printf("Unknown host!\n");
1393        return -1;
1394    }
1395
1396    return 0;
1397}
1398
1399/* Read a CPU node property from the host device tree that's a single
1400 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1401 * (can't find or open the property, or doesn't understand the
1402 * format) */
1403static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1404{
1405    char buf[PATH_MAX];
1406    union {
1407        uint32_t v32;
1408        uint64_t v64;
1409    } u;
1410    FILE *f;
1411    int len;
1412
1413    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414        return -1;
1415    }
1416
1417    strncat(buf, "/", sizeof(buf) - strlen(buf));
1418    strncat(buf, propname, sizeof(buf) - strlen(buf));
1419
1420    f = fopen(buf, "rb");
1421    if (!f) {
1422        return -1;
1423    }
1424
1425    len = fread(&u, 1, sizeof(u), f);
1426    fclose(f);
1427    switch (len) {
1428    case 4:
1429        /* property is a 32-bit quantity */
1430        return be32_to_cpu(u.v32);
1431    case 8:
1432        return be64_to_cpu(u.v64);
1433    }
1434
1435    return 0;
1436}
1437
1438uint64_t kvmppc_get_clockfreq(void)
1439{
1440    return kvmppc_read_int_cpu_dt("clock-frequency");
1441}
1442
1443uint32_t kvmppc_get_vmx(void)
1444{
1445    return kvmppc_read_int_cpu_dt("ibm,vmx");
1446}
1447
1448uint32_t kvmppc_get_dfp(void)
1449{
1450    return kvmppc_read_int_cpu_dt("ibm,dfp");
1451}
1452
1453static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1454 {
1455     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456     CPUState *cs = CPU(cpu);
1457
1458    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460        return 0;
1461    }
1462
1463    return 1;
1464}
1465
1466int kvmppc_get_hasidle(CPUPPCState *env)
1467{
1468    struct kvm_ppc_pvinfo pvinfo;
1469
1470    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472        return 1;
1473    }
1474
1475    return 0;
1476}
1477
1478int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1479{
1480    uint32_t *hc = (uint32_t*)buf;
1481    struct kvm_ppc_pvinfo pvinfo;
1482
1483    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484        memcpy(buf, pvinfo.hcall, buf_len);
1485        return 0;
1486    }
1487
1488    /*
1489     * Fallback to always fail hypercalls:
1490     *
1491     *     li r3, -1
1492     *     nop
1493     *     nop
1494     *     nop
1495     */
1496
1497    hc[0] = 0x3860ffff;
1498    hc[1] = 0x60000000;
1499    hc[2] = 0x60000000;
1500    hc[3] = 0x60000000;
1501
1502    return 0;
1503}
1504
1505void kvmppc_set_papr(PowerPCCPU *cpu)
1506{
1507    CPUState *cs = CPU(cpu);
1508    struct kvm_enable_cap cap = {};
1509    int ret;
1510
1511    cap.cap = KVM_CAP_PPC_PAPR;
1512    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1513
1514    if (ret) {
1515        cpu_abort(cs, "This KVM version does not support PAPR\n");
1516    }
1517
1518    /* Update the capability flag so we sync the right information
1519     * with kvm */
1520    cap_papr = 1;
1521}
1522
1523void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1524{
1525    CPUState *cs = CPU(cpu);
1526    struct kvm_enable_cap cap = {};
1527    int ret;
1528
1529    cap.cap = KVM_CAP_PPC_EPR;
1530    cap.args[0] = mpic_proxy;
1531    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1532
1533    if (ret && mpic_proxy) {
1534        cpu_abort(cs, "This KVM version does not support EPR\n");
1535    }
1536}
1537
1538int kvmppc_smt_threads(void)
1539{
1540    return cap_ppc_smt ? cap_ppc_smt : 1;
1541}
1542
1543#ifdef TARGET_PPC64
1544off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1545{
1546    void *rma;
1547    off_t size;
1548    int fd;
1549    struct kvm_allocate_rma ret;
1550    MemoryRegion *rma_region;
1551
1552    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1553     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1554     *                      not necessary on this hardware
1555     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1556     *
1557     * FIXME: We should allow the user to force contiguous RMA
1558     * allocation in the cap_ppc_rma==1 case.
1559     */
1560    if (cap_ppc_rma < 2) {
1561        return 0;
1562    }
1563
1564    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1565    if (fd < 0) {
1566        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1567                strerror(errno));
1568        return -1;
1569    }
1570
1571    size = MIN(ret.rma_size, 256ul << 20);
1572
1573    rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1574    if (rma == MAP_FAILED) {
1575        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1576        return -1;
1577    };
1578
1579    rma_region = g_new(MemoryRegion, 1);
1580    memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1581    vmstate_register_ram_global(rma_region);
1582    memory_region_add_subregion(sysmem, 0, rma_region);
1583
1584    return size;
1585}
1586
1587uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1588{
1589    struct kvm_ppc_smmu_info info;
1590    long rampagesize, best_page_shift;
1591    int i;
1592
1593    if (cap_ppc_rma >= 2) {
1594        return current_size;
1595    }
1596
1597    /* Find the largest hardware supported page size that's less than
1598     * or equal to the (logical) backing page size of guest RAM */
1599    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1600    rampagesize = getrampagesize();
1601    best_page_shift = 0;
1602
1603    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1604        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1605
1606        if (!sps->page_shift) {
1607            continue;
1608        }
1609
1610        if ((sps->page_shift > best_page_shift)
1611            && ((1UL << sps->page_shift) <= rampagesize)) {
1612            best_page_shift = sps->page_shift;
1613        }
1614    }
1615
1616    return MIN(current_size,
1617               1ULL << (best_page_shift + hash_shift - 7));
1618}
1619#endif
1620
1621void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1622{
1623    struct kvm_create_spapr_tce args = {
1624        .liobn = liobn,
1625        .window_size = window_size,
1626    };
1627    long len;
1628    int fd;
1629    void *table;
1630
1631    /* Must set fd to -1 so we don't try to munmap when called for
1632     * destroying the table, which the upper layers -will- do
1633     */
1634    *pfd = -1;
1635    if (!cap_spapr_tce) {
1636        return NULL;
1637    }
1638
1639    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1640    if (fd < 0) {
1641        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1642                liobn);
1643        return NULL;
1644    }
1645
1646    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1647    /* FIXME: round this up to page size */
1648
1649    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1650    if (table == MAP_FAILED) {
1651        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1652                liobn);
1653        close(fd);
1654        return NULL;
1655    }
1656
1657    *pfd = fd;
1658    return table;
1659}
1660
1661int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1662{
1663    long len;
1664
1665    if (fd < 0) {
1666        return -1;
1667    }
1668
1669    len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1670    if ((munmap(table, len) < 0) ||
1671        (close(fd) < 0)) {
1672        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1673                strerror(errno));
1674        /* Leak the table */
1675    }
1676
1677    return 0;
1678}
1679
1680int kvmppc_reset_htab(int shift_hint)
1681{
1682    uint32_t shift = shift_hint;
1683
1684    if (!kvm_enabled()) {
1685        /* Full emulation, tell caller to allocate htab itself */
1686        return 0;
1687    }
1688    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1689        int ret;
1690        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1691        if (ret == -ENOTTY) {
1692            /* At least some versions of PR KVM advertise the
1693             * capability, but don't implement the ioctl().  Oops.
1694             * Return 0 so that we allocate the htab in qemu, as is
1695             * correct for PR. */
1696            return 0;
1697        } else if (ret < 0) {
1698            return ret;
1699        }
1700        return shift;
1701    }
1702
1703    /* We have a kernel that predates the htab reset calls.  For PR
1704     * KVM, we need to allocate the htab ourselves, for an HV KVM of
1705     * this era, it has allocated a 16MB fixed size hash table
1706     * already.  Kernels of this era have the GET_PVINFO capability
1707     * only on PR, so we use this hack to determine the right
1708     * answer */
1709    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1710        /* PR - tell caller to allocate htab */
1711        return 0;
1712    } else {
1713        /* HV - assume 16MB kernel allocated htab */
1714        return 24;
1715    }
1716}
1717
1718static inline uint32_t mfpvr(void)
1719{
1720    uint32_t pvr;
1721
1722    asm ("mfpvr %0"
1723         : "=r"(pvr));
1724    return pvr;
1725}
1726
1727static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1728{
1729    if (on) {
1730        *word |= flags;
1731    } else {
1732        *word &= ~flags;
1733    }
1734}
1735
1736static void kvmppc_host_cpu_initfn(Object *obj)
1737{
1738    assert(kvm_enabled());
1739}
1740
1741static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1742{
1743    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1744    uint32_t vmx = kvmppc_get_vmx();
1745    uint32_t dfp = kvmppc_get_dfp();
1746    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1747    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1748
1749    /* Now fix up the class with information we can query from the host */
1750    pcc->pvr = mfpvr();
1751
1752    if (vmx != -1) {
1753        /* Only override when we know what the host supports */
1754        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1755        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1756    }
1757    if (dfp != -1) {
1758        /* Only override when we know what the host supports */
1759        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1760    }
1761
1762    if (dcache_size != -1) {
1763        pcc->l1_dcache_size = dcache_size;
1764    }
1765
1766    if (icache_size != -1) {
1767        pcc->l1_icache_size = icache_size;
1768    }
1769}
1770
1771bool kvmppc_has_cap_epr(void)
1772{
1773    return cap_epr;
1774}
1775
1776bool kvmppc_has_cap_htab_fd(void)
1777{
1778    return cap_htab_fd;
1779}
1780
1781static int kvm_ppc_register_host_cpu_type(void)
1782{
1783    TypeInfo type_info = {
1784        .name = TYPE_HOST_POWERPC_CPU,
1785        .instance_init = kvmppc_host_cpu_initfn,
1786        .class_init = kvmppc_host_cpu_class_init,
1787    };
1788    uint32_t host_pvr = mfpvr();
1789    PowerPCCPUClass *pvr_pcc;
1790
1791    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1792    if (pvr_pcc == NULL) {
1793        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1794    }
1795    if (pvr_pcc == NULL) {
1796        return -1;
1797    }
1798    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1799    type_register(&type_info);
1800    return 0;
1801}
1802
1803int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1804{
1805    struct kvm_rtas_token_args args = {
1806        .token = token,
1807    };
1808
1809    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1810        return -ENOENT;
1811    }
1812
1813    strncpy(args.name, function, sizeof(args.name));
1814
1815    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1816}
1817
1818int kvmppc_get_htab_fd(bool write)
1819{
1820    struct kvm_get_htab_fd s = {
1821        .flags = write ? KVM_GET_HTAB_WRITE : 0,
1822        .start_index = 0,
1823    };
1824
1825    if (!cap_htab_fd) {
1826        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1827        return -1;
1828    }
1829
1830    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1831}
1832
1833int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1834{
1835    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1836    uint8_t buf[bufsize];
1837    ssize_t rc;
1838
1839    do {
1840        rc = read(fd, buf, bufsize);
1841        if (rc < 0) {
1842            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1843                    strerror(errno));
1844            return rc;
1845        } else if (rc) {
1846            /* Kernel already retuns data in BE format for the file */
1847            qemu_put_buffer(f, buf, rc);
1848        }
1849    } while ((rc != 0)
1850             && ((max_ns < 0)
1851                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1852
1853    return (rc == 0) ? 1 : 0;
1854}
1855
1856int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1857                           uint16_t n_valid, uint16_t n_invalid)
1858{
1859    struct kvm_get_htab_header *buf;
1860    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1861    ssize_t rc;
1862
1863    buf = alloca(chunksize);
1864    /* This is KVM on ppc, so this is all big-endian */
1865    buf->index = index;
1866    buf->n_valid = n_valid;
1867    buf->n_invalid = n_invalid;
1868
1869    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1870
1871    rc = write(fd, buf, chunksize);
1872    if (rc < 0) {
1873        fprintf(stderr, "Error writing KVM hash table: %s\n",
1874                strerror(errno));
1875        return rc;
1876    }
1877    if (rc != chunksize) {
1878        /* We should never get a short write on a single chunk */
1879        fprintf(stderr, "Short write, restoring KVM hash table\n");
1880        return -1;
1881    }
1882    return 0;
1883}
1884
1885bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1886{
1887    return true;
1888}
1889
1890int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1891{
1892    return 1;
1893}
1894
1895int kvm_arch_on_sigbus(int code, void *addr)
1896{
1897    return 1;
1898}
1899
1900void kvm_arch_init_irq_routing(KVMState *s)
1901{
1902}
1903
1904int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1905{
1906    return -EINVAL;
1907}
1908
1909int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1910{
1911    return -EINVAL;
1912}
1913
1914int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1915{
1916    return -EINVAL;
1917}
1918
1919int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1920{
1921    return -EINVAL;
1922}
1923
1924void kvm_arch_remove_all_hw_breakpoints(void)
1925{
1926}
1927
1928void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1929{
1930}
1931
1932struct kvm_get_htab_buf {
1933    struct kvm_get_htab_header header;
1934    /*
1935     * We require one extra byte for read
1936     */
1937    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1938};
1939
1940uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1941{
1942    int htab_fd;
1943    struct kvm_get_htab_fd ghf;
1944    struct kvm_get_htab_buf  *hpte_buf;
1945
1946    ghf.flags = 0;
1947    ghf.start_index = pte_index;
1948    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1949    if (htab_fd < 0) {
1950        goto error_out;
1951    }
1952
1953    hpte_buf = g_malloc0(sizeof(*hpte_buf));
1954    /*
1955     * Read the hpte group
1956     */
1957    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1958        goto out_close;
1959    }
1960
1961    close(htab_fd);
1962    return (uint64_t)(uintptr_t) hpte_buf->hpte;
1963
1964out_close:
1965    g_free(hpte_buf);
1966    close(htab_fd);
1967error_out:
1968    return 0;
1969}
1970
1971void kvmppc_hash64_free_pteg(uint64_t token)
1972{
1973    struct kvm_get_htab_buf *htab_buf;
1974
1975    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1976                            hpte);
1977    g_free(htab_buf);
1978    return;
1979}
1980
1981void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1982                             target_ulong pte0, target_ulong pte1)
1983{
1984    int htab_fd;
1985    struct kvm_get_htab_fd ghf;
1986    struct kvm_get_htab_buf hpte_buf;
1987
1988    ghf.flags = 0;
1989    ghf.start_index = 0;     /* Ignored */
1990    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1991    if (htab_fd < 0) {
1992        goto error_out;
1993    }
1994
1995    hpte_buf.header.n_valid = 1;
1996    hpte_buf.header.n_invalid = 0;
1997    hpte_buf.header.index = pte_index;
1998    hpte_buf.hpte[0] = pte0;
1999    hpte_buf.hpte[1] = pte1;
2000    /*
2001     * Write the hpte entry.
2002     * CAUTION: write() has the warn_unused_result attribute. Hence we
2003     * need to check the return value, even though we do nothing.
2004     */
2005    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2006        goto out_close;
2007    }
2008
2009out_close:
2010    close(htab_fd);
2011    return;
2012
2013error_out:
2014    return;
2015}
2016