qemu/target/arm/kvm64.c
<<
>>
Prefs
   1/*
   2 * ARM implementation of KVM hooks, 64 bit specific code
   3 *
   4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
   5 * Copyright Alex Bennée 2014, Linaro
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8 * See the COPYING file in the top-level directory.
   9 *
  10 */
  11
  12#include "qemu/osdep.h"
  13#include <sys/ioctl.h>
  14#include <sys/ptrace.h>
  15
  16#include <linux/elf.h>
  17#include <linux/kvm.h>
  18
  19#include "qemu-common.h"
  20#include "cpu.h"
  21#include "qemu/timer.h"
  22#include "qemu/error-report.h"
  23#include "qemu/host-utils.h"
  24#include "exec/gdbstub.h"
  25#include "sysemu/sysemu.h"
  26#include "sysemu/kvm.h"
  27#include "kvm_arm.h"
  28#include "internals.h"
  29
  30static bool have_guest_debug;
  31
  32/*
  33 * Although the ARM implementation of hardware assisted debugging
  34 * allows for different breakpoints per-core, the current GDB
  35 * interface treats them as a global pool of registers (which seems to
  36 * be the case for x86, ppc and s390). As a result we store one copy
  37 * of registers which is used for all active cores.
  38 *
  39 * Write access is serialised by virtue of the GDB protocol which
  40 * updates things. Read access (i.e. when the values are copied to the
  41 * vCPU) is also gated by GDB's run control.
  42 *
  43 * This is not unreasonable as most of the time debugging kernels you
  44 * never know which core will eventually execute your function.
  45 */
  46
  47typedef struct {
  48    uint64_t bcr;
  49    uint64_t bvr;
  50} HWBreakpoint;
  51
  52/* The watchpoint registers can cover more area than the requested
  53 * watchpoint so we need to store the additional information
  54 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub
  55 * when the watchpoint is hit.
  56 */
  57typedef struct {
  58    uint64_t wcr;
  59    uint64_t wvr;
  60    CPUWatchpoint details;
  61} HWWatchpoint;
  62
  63/* Maximum and current break/watch point counts */
  64int max_hw_bps, max_hw_wps;
  65GArray *hw_breakpoints, *hw_watchpoints;
  66
  67#define cur_hw_wps      (hw_watchpoints->len)
  68#define cur_hw_bps      (hw_breakpoints->len)
  69#define get_hw_bp(i)    (&g_array_index(hw_breakpoints, HWBreakpoint, i))
  70#define get_hw_wp(i)    (&g_array_index(hw_watchpoints, HWWatchpoint, i))
  71
  72/**
  73 * kvm_arm_init_debug() - check for guest debug capabilities
  74 * @cs: CPUState
  75 *
  76 * kvm_check_extension returns the number of debug registers we have
  77 * or 0 if we have none.
  78 *
  79 */
  80static void kvm_arm_init_debug(CPUState *cs)
  81{
  82    have_guest_debug = kvm_check_extension(cs->kvm_state,
  83                                           KVM_CAP_SET_GUEST_DEBUG);
  84
  85    max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS);
  86    hw_watchpoints = g_array_sized_new(true, true,
  87                                       sizeof(HWWatchpoint), max_hw_wps);
  88
  89    max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS);
  90    hw_breakpoints = g_array_sized_new(true, true,
  91                                       sizeof(HWBreakpoint), max_hw_bps);
  92    return;
  93}
  94
  95/**
  96 * insert_hw_breakpoint()
  97 * @addr: address of breakpoint
  98 *
  99 * See ARM ARM D2.9.1 for details but here we are only going to create
 100 * simple un-linked breakpoints (i.e. we don't chain breakpoints
 101 * together to match address and context or vmid). The hardware is
 102 * capable of fancier matching but that will require exposing that
 103 * fanciness to GDB's interface
 104 *
 105 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers
 106 *
 107 *  31  24 23  20 19   16 15 14  13  12   9 8   5 4    3 2   1  0
 108 * +------+------+-------+-----+----+------+-----+------+-----+---+
 109 * | RES0 |  BT  |  LBN  | SSC | HMC| RES0 | BAS | RES0 | PMC | E |
 110 * +------+------+-------+-----+----+------+-----+------+-----+---+
 111 *
 112 * BT: Breakpoint type (0 = unlinked address match)
 113 * LBN: Linked BP number (0 = unused)
 114 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12)
 115 * BAS: Byte Address Select (RES1 for AArch64)
 116 * E: Enable bit
 117 *
 118 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers
 119 *
 120 *  63  53 52       49 48       2  1 0
 121 * +------+-----------+----------+-----+
 122 * | RESS | VA[52:49] | VA[48:2] | 0 0 |
 123 * +------+-----------+----------+-----+
 124 *
 125 * Depending on the addressing mode bits the top bits of the register
 126 * are a sign extension of the highest applicable VA bit. Some
 127 * versions of GDB don't do it correctly so we ensure they are correct
 128 * here so future PC comparisons will work properly.
 129 */
 130
 131static int insert_hw_breakpoint(target_ulong addr)
 132{
 133    HWBreakpoint brk = {
 134        .bcr = 0x1,                             /* BCR E=1, enable */
 135        .bvr = sextract64(addr, 0, 53)
 136    };
 137
 138    if (cur_hw_bps >= max_hw_bps) {
 139        return -ENOBUFS;
 140    }
 141
 142    brk.bcr = deposit32(brk.bcr, 1, 2, 0x3);   /* PMC = 11 */
 143    brk.bcr = deposit32(brk.bcr, 5, 4, 0xf);   /* BAS = RES1 */
 144
 145    g_array_append_val(hw_breakpoints, brk);
 146
 147    return 0;
 148}
 149
 150/**
 151 * delete_hw_breakpoint()
 152 * @pc: address of breakpoint
 153 *
 154 * Delete a breakpoint and shuffle any above down
 155 */
 156
 157static int delete_hw_breakpoint(target_ulong pc)
 158{
 159    int i;
 160    for (i = 0; i < hw_breakpoints->len; i++) {
 161        HWBreakpoint *brk = get_hw_bp(i);
 162        if (brk->bvr == pc) {
 163            g_array_remove_index(hw_breakpoints, i);
 164            return 0;
 165        }
 166    }
 167    return -ENOENT;
 168}
 169
 170/**
 171 * insert_hw_watchpoint()
 172 * @addr: address of watch point
 173 * @len: size of area
 174 * @type: type of watch point
 175 *
 176 * See ARM ARM D2.10. As with the breakpoints we can do some advanced
 177 * stuff if we want to. The watch points can be linked with the break
 178 * points above to make them context aware. However for simplicity
 179 * currently we only deal with simple read/write watch points.
 180 *
 181 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers
 182 *
 183 *  31  29 28   24 23  21  20  19 16 15 14  13   12  5 4   3 2   1  0
 184 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
 185 * | RES0 |  MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E |
 186 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
 187 *
 188 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes))
 189 * WT: 0 - unlinked, 1 - linked (not currently used)
 190 * LBN: Linked BP number (not currently used)
 191 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11)
 192 * BAS: Byte Address Select
 193 * LSC: Load/Store control (01: load, 10: store, 11: both)
 194 * E: Enable
 195 *
 196 * The bottom 2 bits of the value register are masked. Therefore to
 197 * break on any sizes smaller than an unaligned word you need to set
 198 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you
 199 * need to ensure you mask the address as required and set BAS=0xff
 200 */
 201
 202static int insert_hw_watchpoint(target_ulong addr,
 203                                target_ulong len, int type)
 204{
 205    HWWatchpoint wp = {
 206        .wcr = 1, /* E=1, enable */
 207        .wvr = addr & (~0x7ULL),
 208        .details = { .vaddr = addr, .len = len }
 209    };
 210
 211    if (cur_hw_wps >= max_hw_wps) {
 212        return -ENOBUFS;
 213    }
 214
 215    /*
 216     * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state,
 217     * valid whether EL3 is implemented or not
 218     */
 219    wp.wcr = deposit32(wp.wcr, 1, 2, 3);
 220
 221    switch (type) {
 222    case GDB_WATCHPOINT_READ:
 223        wp.wcr = deposit32(wp.wcr, 3, 2, 1);
 224        wp.details.flags = BP_MEM_READ;
 225        break;
 226    case GDB_WATCHPOINT_WRITE:
 227        wp.wcr = deposit32(wp.wcr, 3, 2, 2);
 228        wp.details.flags = BP_MEM_WRITE;
 229        break;
 230    case GDB_WATCHPOINT_ACCESS:
 231        wp.wcr = deposit32(wp.wcr, 3, 2, 3);
 232        wp.details.flags = BP_MEM_ACCESS;
 233        break;
 234    default:
 235        g_assert_not_reached();
 236        break;
 237    }
 238    if (len <= 8) {
 239        /* we align the address and set the bits in BAS */
 240        int off = addr & 0x7;
 241        int bas = (1 << len) - 1;
 242
 243        wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas);
 244    } else {
 245        /* For ranges above 8 bytes we need to be a power of 2 */
 246        if (is_power_of_2(len)) {
 247            int bits = ctz64(len);
 248
 249            wp.wvr &= ~((1 << bits) - 1);
 250            wp.wcr = deposit32(wp.wcr, 24, 4, bits);
 251            wp.wcr = deposit32(wp.wcr, 5, 8, 0xff);
 252        } else {
 253            return -ENOBUFS;
 254        }
 255    }
 256
 257    g_array_append_val(hw_watchpoints, wp);
 258    return 0;
 259}
 260
 261
 262static bool check_watchpoint_in_range(int i, target_ulong addr)
 263{
 264    HWWatchpoint *wp = get_hw_wp(i);
 265    uint64_t addr_top, addr_bottom = wp->wvr;
 266    int bas = extract32(wp->wcr, 5, 8);
 267    int mask = extract32(wp->wcr, 24, 4);
 268
 269    if (mask) {
 270        addr_top = addr_bottom + (1 << mask);
 271    } else {
 272        /* BAS must be contiguous but can offset against the base
 273         * address in DBGWVR */
 274        addr_bottom = addr_bottom + ctz32(bas);
 275        addr_top = addr_bottom + clo32(bas);
 276    }
 277
 278    if (addr >= addr_bottom && addr <= addr_top) {
 279        return true;
 280    }
 281
 282    return false;
 283}
 284
 285/**
 286 * delete_hw_watchpoint()
 287 * @addr: address of breakpoint
 288 *
 289 * Delete a breakpoint and shuffle any above down
 290 */
 291
 292static int delete_hw_watchpoint(target_ulong addr,
 293                                target_ulong len, int type)
 294{
 295    int i;
 296    for (i = 0; i < cur_hw_wps; i++) {
 297        if (check_watchpoint_in_range(i, addr)) {
 298            g_array_remove_index(hw_watchpoints, i);
 299            return 0;
 300        }
 301    }
 302    return -ENOENT;
 303}
 304
 305
 306int kvm_arch_insert_hw_breakpoint(target_ulong addr,
 307                                  target_ulong len, int type)
 308{
 309    switch (type) {
 310    case GDB_BREAKPOINT_HW:
 311        return insert_hw_breakpoint(addr);
 312        break;
 313    case GDB_WATCHPOINT_READ:
 314    case GDB_WATCHPOINT_WRITE:
 315    case GDB_WATCHPOINT_ACCESS:
 316        return insert_hw_watchpoint(addr, len, type);
 317    default:
 318        return -ENOSYS;
 319    }
 320}
 321
 322int kvm_arch_remove_hw_breakpoint(target_ulong addr,
 323                                  target_ulong len, int type)
 324{
 325    switch (type) {
 326    case GDB_BREAKPOINT_HW:
 327        return delete_hw_breakpoint(addr);
 328        break;
 329    case GDB_WATCHPOINT_READ:
 330    case GDB_WATCHPOINT_WRITE:
 331    case GDB_WATCHPOINT_ACCESS:
 332        return delete_hw_watchpoint(addr, len, type);
 333    default:
 334        return -ENOSYS;
 335    }
 336}
 337
 338
 339void kvm_arch_remove_all_hw_breakpoints(void)
 340{
 341    if (cur_hw_wps > 0) {
 342        g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
 343    }
 344    if (cur_hw_bps > 0) {
 345        g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
 346    }
 347}
 348
 349void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
 350{
 351    int i;
 352    memset(ptr, 0, sizeof(struct kvm_guest_debug_arch));
 353
 354    for (i = 0; i < max_hw_wps; i++) {
 355        HWWatchpoint *wp = get_hw_wp(i);
 356        ptr->dbg_wcr[i] = wp->wcr;
 357        ptr->dbg_wvr[i] = wp->wvr;
 358    }
 359    for (i = 0; i < max_hw_bps; i++) {
 360        HWBreakpoint *bp = get_hw_bp(i);
 361        ptr->dbg_bcr[i] = bp->bcr;
 362        ptr->dbg_bvr[i] = bp->bvr;
 363    }
 364}
 365
 366bool kvm_arm_hw_debug_active(CPUState *cs)
 367{
 368    return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
 369}
 370
 371static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
 372{
 373    int i;
 374
 375    for (i = 0; i < cur_hw_bps; i++) {
 376        HWBreakpoint *bp = get_hw_bp(i);
 377        if (bp->bvr == pc) {
 378            return true;
 379        }
 380    }
 381    return false;
 382}
 383
 384static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
 385{
 386    int i;
 387
 388    for (i = 0; i < cur_hw_wps; i++) {
 389        if (check_watchpoint_in_range(i, addr)) {
 390            return &get_hw_wp(i)->details;
 391        }
 392    }
 393    return NULL;
 394}
 395
 396static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr)
 397{
 398    int err;
 399
 400    err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
 401    if (err != 0) {
 402        error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err));
 403        return false;
 404    }
 405
 406    err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
 407    if (err != 0) {
 408        error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err));
 409        return false;
 410    }
 411
 412    return true;
 413}
 414
 415void kvm_arm_pmu_init(CPUState *cs)
 416{
 417    struct kvm_device_attr attr = {
 418        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 419        .attr = KVM_ARM_VCPU_PMU_V3_INIT,
 420    };
 421
 422    if (!ARM_CPU(cs)->has_pmu) {
 423        return;
 424    }
 425    if (!kvm_arm_pmu_set_attr(cs, &attr)) {
 426        error_report("failed to init PMU");
 427        abort();
 428    }
 429}
 430
 431void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
 432{
 433    struct kvm_device_attr attr = {
 434        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 435        .addr = (intptr_t)&irq,
 436        .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
 437    };
 438
 439    if (!ARM_CPU(cs)->has_pmu) {
 440        return;
 441    }
 442    if (!kvm_arm_pmu_set_attr(cs, &attr)) {
 443        error_report("failed to set irq for PMU");
 444        abort();
 445    }
 446}
 447
 448static inline void set_feature(uint64_t *features, int feature)
 449{
 450    *features |= 1ULL << feature;
 451}
 452
 453static inline void unset_feature(uint64_t *features, int feature)
 454{
 455    *features &= ~(1ULL << feature);
 456}
 457
 458static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
 459{
 460    uint64_t ret;
 461    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret };
 462    int err;
 463
 464    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 465    err = ioctl(fd, KVM_GET_ONE_REG, &idreg);
 466    if (err < 0) {
 467        return -1;
 468    }
 469    *pret = ret;
 470    return 0;
 471}
 472
 473static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id)
 474{
 475    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret };
 476
 477    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 478    return ioctl(fd, KVM_GET_ONE_REG, &idreg);
 479}
 480
 481bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 482{
 483    /* Identify the feature bits corresponding to the host CPU, and
 484     * fill out the ARMHostCPUClass fields accordingly. To do this
 485     * we have to create a scratch VM, create a single CPU inside it,
 486     * and then query that CPU for the relevant ID registers.
 487     */
 488    int fdarray[3];
 489    uint64_t features = 0;
 490    int err;
 491
 492    /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
 493     * we know these will only support creating one kind of guest CPU,
 494     * which is its preferred CPU type. Fortunately these old kernels
 495     * support only a very limited number of CPUs.
 496     */
 497    static const uint32_t cpus_to_try[] = {
 498        KVM_ARM_TARGET_AEM_V8,
 499        KVM_ARM_TARGET_FOUNDATION_V8,
 500        KVM_ARM_TARGET_CORTEX_A57,
 501        QEMU_KVM_ARM_TARGET_NONE
 502    };
 503    struct kvm_vcpu_init init;
 504
 505    if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
 506        return false;
 507    }
 508
 509    ahcf->target = init.target;
 510    ahcf->dtb_compatible = "arm,arm-v8";
 511
 512    err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0,
 513                         ARM64_SYS_REG(3, 0, 0, 4, 0));
 514    if (unlikely(err < 0)) {
 515        /*
 516         * Before v4.15, the kernel only exposed a limited number of system
 517         * registers, not including any of the interesting AArch64 ID regs.
 518         * For the most part we could leave these fields as zero with minimal
 519         * effect, since this does not affect the values seen by the guest.
 520         *
 521         * However, it could cause problems down the line for QEMU,
 522         * so provide a minimal v8.0 default.
 523         *
 524         * ??? Could read MIDR and use knowledge from cpu64.c.
 525         * ??? Could map a page of memory into our temp guest and
 526         *     run the tiniest of hand-crafted kernels to extract
 527         *     the values seen by the guest.
 528         * ??? Either of these sounds like too much effort just
 529         *     to work around running a modern host kernel.
 530         */
 531        ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */
 532        err = 0;
 533    } else {
 534        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
 535                              ARM64_SYS_REG(3, 0, 0, 4, 1));
 536        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
 537                              ARM64_SYS_REG(3, 0, 0, 6, 0));
 538        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
 539                              ARM64_SYS_REG(3, 0, 0, 6, 1));
 540        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
 541                              ARM64_SYS_REG(3, 0, 0, 7, 0));
 542        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
 543                              ARM64_SYS_REG(3, 0, 0, 7, 1));
 544
 545        /*
 546         * Note that if AArch32 support is not present in the host,
 547         * the AArch32 sysregs are present to be read, but will
 548         * return UNKNOWN values.  This is neither better nor worse
 549         * than skipping the reads and leaving 0, as we must avoid
 550         * considering the values in every case.
 551         */
 552        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
 553                              ARM64_SYS_REG(3, 0, 0, 2, 0));
 554        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
 555                              ARM64_SYS_REG(3, 0, 0, 2, 1));
 556        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
 557                              ARM64_SYS_REG(3, 0, 0, 2, 2));
 558        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
 559                              ARM64_SYS_REG(3, 0, 0, 2, 3));
 560        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
 561                              ARM64_SYS_REG(3, 0, 0, 2, 4));
 562        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
 563                              ARM64_SYS_REG(3, 0, 0, 2, 5));
 564        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
 565                              ARM64_SYS_REG(3, 0, 0, 2, 7));
 566
 567        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
 568                              ARM64_SYS_REG(3, 0, 0, 3, 0));
 569        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
 570                              ARM64_SYS_REG(3, 0, 0, 3, 1));
 571        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
 572                              ARM64_SYS_REG(3, 0, 0, 3, 2));
 573    }
 574
 575    kvm_arm_destroy_scratch_host_vcpu(fdarray);
 576
 577    if (err < 0) {
 578        return false;
 579    }
 580
 581   /* We can assume any KVM supporting CPU is at least a v8
 582     * with VFPv4+Neon; this in turn implies most of the other
 583     * feature bits.
 584     */
 585    set_feature(&features, ARM_FEATURE_V8);
 586    set_feature(&features, ARM_FEATURE_VFP4);
 587    set_feature(&features, ARM_FEATURE_NEON);
 588    set_feature(&features, ARM_FEATURE_AARCH64);
 589    set_feature(&features, ARM_FEATURE_PMU);
 590
 591    ahcf->features = features;
 592
 593    return true;
 594}
 595
 596#define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
 597
 598int kvm_arch_init_vcpu(CPUState *cs)
 599{
 600    int ret;
 601    uint64_t mpidr;
 602    ARMCPU *cpu = ARM_CPU(cs);
 603    CPUARMState *env = &cpu->env;
 604
 605    if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
 606        !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
 607        fprintf(stderr, "KVM is not supported for this guest CPU type\n");
 608        return -EINVAL;
 609    }
 610
 611    /* Determine init features for this CPU */
 612    memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
 613    if (cpu->start_powered_off) {
 614        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 615    }
 616    if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
 617        cpu->psci_version = 2;
 618        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
 619    }
 620    if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 621        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
 622    }
 623    if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
 624            cpu->has_pmu = false;
 625    }
 626    if (cpu->has_pmu) {
 627        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
 628    } else {
 629        unset_feature(&env->features, ARM_FEATURE_PMU);
 630    }
 631
 632    /* Do KVM_ARM_VCPU_INIT ioctl */
 633    ret = kvm_arm_vcpu_init(cs);
 634    if (ret) {
 635        return ret;
 636    }
 637
 638    /*
 639     * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
 640     * Currently KVM has its own idea about MPIDR assignment, so we
 641     * override our defaults with what we get from KVM.
 642     */
 643    ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr);
 644    if (ret) {
 645        return ret;
 646    }
 647    cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
 648
 649    kvm_arm_init_debug(cs);
 650
 651    /* Check whether user space can specify guest syndrome value */
 652    kvm_arm_init_serror_injection(cs);
 653
 654    return kvm_arm_init_cpreg_list(cpu);
 655}
 656
 657int kvm_arch_destroy_vcpu(CPUState *cs)
 658{
 659    return 0;
 660}
 661
 662bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
 663{
 664    /* Return true if the regidx is a register we should synchronize
 665     * via the cpreg_tuples array (ie is not a core reg we sync by
 666     * hand in kvm_arch_get/put_registers())
 667     */
 668    switch (regidx & KVM_REG_ARM_COPROC_MASK) {
 669    case KVM_REG_ARM_CORE:
 670        return false;
 671    default:
 672        return true;
 673    }
 674}
 675
 676typedef struct CPRegStateLevel {
 677    uint64_t regidx;
 678    int level;
 679} CPRegStateLevel;
 680
 681/* All system registers not listed in the following table are assumed to be
 682 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
 683 * often, you must add it to this table with a state of either
 684 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
 685 */
 686static const CPRegStateLevel non_runtime_cpregs[] = {
 687    { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
 688};
 689
 690int kvm_arm_cpreg_level(uint64_t regidx)
 691{
 692    int i;
 693
 694    for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
 695        const CPRegStateLevel *l = &non_runtime_cpregs[i];
 696        if (l->regidx == regidx) {
 697            return l->level;
 698        }
 699    }
 700
 701    return KVM_PUT_RUNTIME_STATE;
 702}
 703
 704#define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 705                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 706
 707#define AARCH64_SIMD_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
 708                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 709
 710#define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
 711                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 712
 713int kvm_arch_put_registers(CPUState *cs, int level)
 714{
 715    struct kvm_one_reg reg;
 716    uint32_t fpr;
 717    uint64_t val;
 718    int i;
 719    int ret;
 720    unsigned int el;
 721
 722    ARMCPU *cpu = ARM_CPU(cs);
 723    CPUARMState *env = &cpu->env;
 724
 725    /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
 726     * AArch64 registers before pushing them out to 64-bit KVM.
 727     */
 728    if (!is_a64(env)) {
 729        aarch64_sync_32_to_64(env);
 730    }
 731
 732    for (i = 0; i < 31; i++) {
 733        reg.id = AARCH64_CORE_REG(regs.regs[i]);
 734        reg.addr = (uintptr_t) &env->xregs[i];
 735        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 736        if (ret) {
 737            return ret;
 738        }
 739    }
 740
 741    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
 742     * QEMU side we keep the current SP in xregs[31] as well.
 743     */
 744    aarch64_save_sp(env, 1);
 745
 746    reg.id = AARCH64_CORE_REG(regs.sp);
 747    reg.addr = (uintptr_t) &env->sp_el[0];
 748    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 749    if (ret) {
 750        return ret;
 751    }
 752
 753    reg.id = AARCH64_CORE_REG(sp_el1);
 754    reg.addr = (uintptr_t) &env->sp_el[1];
 755    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 756    if (ret) {
 757        return ret;
 758    }
 759
 760    /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */
 761    if (is_a64(env)) {
 762        val = pstate_read(env);
 763    } else {
 764        val = cpsr_read(env);
 765    }
 766    reg.id = AARCH64_CORE_REG(regs.pstate);
 767    reg.addr = (uintptr_t) &val;
 768    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 769    if (ret) {
 770        return ret;
 771    }
 772
 773    reg.id = AARCH64_CORE_REG(regs.pc);
 774    reg.addr = (uintptr_t) &env->pc;
 775    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 776    if (ret) {
 777        return ret;
 778    }
 779
 780    reg.id = AARCH64_CORE_REG(elr_el1);
 781    reg.addr = (uintptr_t) &env->elr_el[1];
 782    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 783    if (ret) {
 784        return ret;
 785    }
 786
 787    /* Saved Program State Registers
 788     *
 789     * Before we restore from the banked_spsr[] array we need to
 790     * ensure that any modifications to env->spsr are correctly
 791     * reflected in the banks.
 792     */
 793    el = arm_current_el(env);
 794    if (el > 0 && !is_a64(env)) {
 795        i = bank_number(env->uncached_cpsr & CPSR_M);
 796        env->banked_spsr[i] = env->spsr;
 797    }
 798
 799    /* KVM 0-4 map to QEMU banks 1-5 */
 800    for (i = 0; i < KVM_NR_SPSR; i++) {
 801        reg.id = AARCH64_CORE_REG(spsr[i]);
 802        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
 803        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 804        if (ret) {
 805            return ret;
 806        }
 807    }
 808
 809    /* Advanced SIMD and FP registers. */
 810    for (i = 0; i < 32; i++) {
 811        uint64_t *q = aa64_vfp_qreg(env, i);
 812#ifdef HOST_WORDS_BIGENDIAN
 813        uint64_t fp_val[2] = { q[1], q[0] };
 814        reg.addr = (uintptr_t)fp_val;
 815#else
 816        reg.addr = (uintptr_t)q;
 817#endif
 818        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
 819        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 820        if (ret) {
 821            return ret;
 822        }
 823    }
 824
 825    reg.addr = (uintptr_t)(&fpr);
 826    fpr = vfp_get_fpsr(env);
 827    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
 828    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 829    if (ret) {
 830        return ret;
 831    }
 832
 833    fpr = vfp_get_fpcr(env);
 834    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
 835    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836    if (ret) {
 837        return ret;
 838    }
 839
 840    ret = kvm_put_vcpu_events(cpu);
 841    if (ret) {
 842        return ret;
 843    }
 844
 845    write_cpustate_to_list(cpu, true);
 846
 847    if (!write_list_to_kvmstate(cpu, level)) {
 848        return EINVAL;
 849    }
 850
 851    kvm_arm_sync_mpstate_to_kvm(cpu);
 852
 853    return ret;
 854}
 855
 856int kvm_arch_get_registers(CPUState *cs)
 857{
 858    struct kvm_one_reg reg;
 859    uint64_t val;
 860    uint32_t fpr;
 861    unsigned int el;
 862    int i;
 863    int ret;
 864
 865    ARMCPU *cpu = ARM_CPU(cs);
 866    CPUARMState *env = &cpu->env;
 867
 868    for (i = 0; i < 31; i++) {
 869        reg.id = AARCH64_CORE_REG(regs.regs[i]);
 870        reg.addr = (uintptr_t) &env->xregs[i];
 871        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 872        if (ret) {
 873            return ret;
 874        }
 875    }
 876
 877    reg.id = AARCH64_CORE_REG(regs.sp);
 878    reg.addr = (uintptr_t) &env->sp_el[0];
 879    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 880    if (ret) {
 881        return ret;
 882    }
 883
 884    reg.id = AARCH64_CORE_REG(sp_el1);
 885    reg.addr = (uintptr_t) &env->sp_el[1];
 886    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 887    if (ret) {
 888        return ret;
 889    }
 890
 891    reg.id = AARCH64_CORE_REG(regs.pstate);
 892    reg.addr = (uintptr_t) &val;
 893    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 894    if (ret) {
 895        return ret;
 896    }
 897
 898    env->aarch64 = ((val & PSTATE_nRW) == 0);
 899    if (is_a64(env)) {
 900        pstate_write(env, val);
 901    } else {
 902        cpsr_write(env, val, 0xffffffff, CPSRWriteRaw);
 903    }
 904
 905    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
 906     * QEMU side we keep the current SP in xregs[31] as well.
 907     */
 908    aarch64_restore_sp(env, 1);
 909
 910    reg.id = AARCH64_CORE_REG(regs.pc);
 911    reg.addr = (uintptr_t) &env->pc;
 912    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 913    if (ret) {
 914        return ret;
 915    }
 916
 917    /* If we are in AArch32 mode then we need to sync the AArch32 regs with the
 918     * incoming AArch64 regs received from 64-bit KVM.
 919     * We must perform this after all of the registers have been acquired from
 920     * the kernel.
 921     */
 922    if (!is_a64(env)) {
 923        aarch64_sync_64_to_32(env);
 924    }
 925
 926    reg.id = AARCH64_CORE_REG(elr_el1);
 927    reg.addr = (uintptr_t) &env->elr_el[1];
 928    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 929    if (ret) {
 930        return ret;
 931    }
 932
 933    /* Fetch the SPSR registers
 934     *
 935     * KVM SPSRs 0-4 map to QEMU banks 1-5
 936     */
 937    for (i = 0; i < KVM_NR_SPSR; i++) {
 938        reg.id = AARCH64_CORE_REG(spsr[i]);
 939        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
 940        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 941        if (ret) {
 942            return ret;
 943        }
 944    }
 945
 946    el = arm_current_el(env);
 947    if (el > 0 && !is_a64(env)) {
 948        i = bank_number(env->uncached_cpsr & CPSR_M);
 949        env->spsr = env->banked_spsr[i];
 950    }
 951
 952    /* Advanced SIMD and FP registers */
 953    for (i = 0; i < 32; i++) {
 954        uint64_t *q = aa64_vfp_qreg(env, i);
 955        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
 956        reg.addr = (uintptr_t)q;
 957        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 958        if (ret) {
 959            return ret;
 960        } else {
 961#ifdef HOST_WORDS_BIGENDIAN
 962            uint64_t t;
 963            t = q[0], q[0] = q[1], q[1] = t;
 964#endif
 965        }
 966    }
 967
 968    reg.addr = (uintptr_t)(&fpr);
 969    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
 970    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 971    if (ret) {
 972        return ret;
 973    }
 974    vfp_set_fpsr(env, fpr);
 975
 976    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
 977    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 978    if (ret) {
 979        return ret;
 980    }
 981    vfp_set_fpcr(env, fpr);
 982
 983    ret = kvm_get_vcpu_events(cpu);
 984    if (ret) {
 985        return ret;
 986    }
 987
 988    if (!write_kvmstate_to_list(cpu)) {
 989        return EINVAL;
 990    }
 991    /* Note that it's OK to have registers which aren't in CPUState,
 992     * so we can ignore a failure return here.
 993     */
 994    write_list_to_cpustate(cpu);
 995
 996    kvm_arm_sync_mpstate_to_qemu(cpu);
 997
 998    /* TODO: other registers */
 999    return ret;
1000}
1001
1002/* C6.6.29 BRK instruction */
1003static const uint32_t brk_insn = 0xd4200000;
1004
1005int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1006{
1007    if (have_guest_debug) {
1008        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
1009            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
1010            return -EINVAL;
1011        }
1012        return 0;
1013    } else {
1014        error_report("guest debug not supported on this kernel");
1015        return -EINVAL;
1016    }
1017}
1018
1019int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1020{
1021    static uint32_t brk;
1022
1023    if (have_guest_debug) {
1024        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) ||
1025            brk != brk_insn ||
1026            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
1027            return -EINVAL;
1028        }
1029        return 0;
1030    } else {
1031        error_report("guest debug not supported on this kernel");
1032        return -EINVAL;
1033    }
1034}
1035
1036/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
1037 *
1038 * To minimise translating between kernel and user-space the kernel
1039 * ABI just provides user-space with the full exception syndrome
1040 * register value to be decoded in QEMU.
1041 */
1042
1043bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
1044{
1045    int hsr_ec = syn_get_ec(debug_exit->hsr);
1046    ARMCPU *cpu = ARM_CPU(cs);
1047    CPUClass *cc = CPU_GET_CLASS(cs);
1048    CPUARMState *env = &cpu->env;
1049
1050    /* Ensure PC is synchronised */
1051    kvm_cpu_synchronize_state(cs);
1052
1053    switch (hsr_ec) {
1054    case EC_SOFTWARESTEP:
1055        if (cs->singlestep_enabled) {
1056            return true;
1057        } else {
1058            /*
1059             * The kernel should have suppressed the guest's ability to
1060             * single step at this point so something has gone wrong.
1061             */
1062            error_report("%s: guest single-step while debugging unsupported"
1063                         " (%"PRIx64", %"PRIx32")",
1064                         __func__, env->pc, debug_exit->hsr);
1065            return false;
1066        }
1067        break;
1068    case EC_AA64_BKPT:
1069        if (kvm_find_sw_breakpoint(cs, env->pc)) {
1070            return true;
1071        }
1072        break;
1073    case EC_BREAKPOINT:
1074        if (find_hw_breakpoint(cs, env->pc)) {
1075            return true;
1076        }
1077        break;
1078    case EC_WATCHPOINT:
1079    {
1080        CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far);
1081        if (wp) {
1082            cs->watchpoint_hit = wp;
1083            return true;
1084        }
1085        break;
1086    }
1087    default:
1088        error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
1089                     __func__, debug_exit->hsr, env->pc);
1090    }
1091
1092    /* If we are not handling the debug exception it must belong to
1093     * the guest. Let's re-use the existing TCG interrupt code to set
1094     * everything up properly.
1095     */
1096    cs->exception_index = EXCP_BKPT;
1097    env->exception.syndrome = debug_exit->hsr;
1098    env->exception.vaddress = debug_exit->far;
1099    env->exception.target_el = 1;
1100    qemu_mutex_lock_iothread();
1101    cc->do_interrupt(cs);
1102    qemu_mutex_unlock_iothread();
1103
1104    return false;
1105}
1106