qemu/hw/intc/spapr_xive_kvm.c
<<
>>
Prefs
   1/*
   2 * QEMU PowerPC sPAPR XIVE interrupt controller model
   3 *
   4 * Copyright (c) 2017-2019, IBM Corporation.
   5 *
   6 * This code is licensed under the GPL version 2 or later. See the
   7 * COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/log.h"
  12#include "qemu/error-report.h"
  13#include "qapi/error.h"
  14#include "target/ppc/cpu.h"
  15#include "sysemu/cpus.h"
  16#include "sysemu/kvm.h"
  17#include "sysemu/runstate.h"
  18#include "hw/ppc/spapr.h"
  19#include "hw/ppc/spapr_cpu_core.h"
  20#include "hw/ppc/spapr_xive.h"
  21#include "hw/ppc/xive.h"
  22#include "kvm_ppc.h"
  23#include "trace.h"
  24
  25#include <sys/ioctl.h>
  26
  27/*
  28 * Helpers for CPU hotplug
  29 *
  30 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
  31 */
  32typedef struct KVMEnabledCPU {
  33    unsigned long vcpu_id;
  34    QLIST_ENTRY(KVMEnabledCPU) node;
  35} KVMEnabledCPU;
  36
  37static QLIST_HEAD(, KVMEnabledCPU)
  38    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
  39
  40static bool kvm_cpu_is_enabled(CPUState *cs)
  41{
  42    KVMEnabledCPU *enabled_cpu;
  43    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
  44
  45    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
  46        if (enabled_cpu->vcpu_id == vcpu_id) {
  47            return true;
  48        }
  49    }
  50    return false;
  51}
  52
  53static void kvm_cpu_enable(CPUState *cs)
  54{
  55    KVMEnabledCPU *enabled_cpu;
  56    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
  57
  58    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
  59    enabled_cpu->vcpu_id = vcpu_id;
  60    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
  61}
  62
  63static void kvm_cpu_disable_all(void)
  64{
  65    KVMEnabledCPU *enabled_cpu, *next;
  66
  67    QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) {
  68        QLIST_REMOVE(enabled_cpu, node);
  69        g_free(enabled_cpu);
  70    }
  71}
  72
  73/*
  74 * XIVE Thread Interrupt Management context (KVM)
  75 */
  76
  77int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
  78{
  79    SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
  80    uint64_t state[2];
  81    int ret;
  82
  83    assert(xive->fd != -1);
  84
  85    /* word0 and word1 of the OS ring. */
  86    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
  87
  88    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
  89    if (ret != 0) {
  90        error_setg_errno(errp, -ret,
  91                         "XIVE: could not restore KVM state of CPU %ld",
  92                         kvm_arch_vcpu_id(tctx->cs));
  93        return ret;
  94    }
  95
  96    return 0;
  97}
  98
  99int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
 100{
 101    SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
 102    uint64_t state[2] = { 0 };
 103    int ret;
 104
 105    assert(xive->fd != -1);
 106
 107    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
 108    if (ret != 0) {
 109        error_setg_errno(errp, -ret,
 110                         "XIVE: could not capture KVM state of CPU %ld",
 111                         kvm_arch_vcpu_id(tctx->cs));
 112        return ret;
 113    }
 114
 115    /* word0 and word1 of the OS ring. */
 116    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
 117
 118    return 0;
 119}
 120
 121typedef struct {
 122    XiveTCTX *tctx;
 123    Error **errp;
 124    int ret;
 125} XiveCpuGetState;
 126
 127static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
 128                                                 run_on_cpu_data arg)
 129{
 130    XiveCpuGetState *s = arg.host_ptr;
 131
 132    s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp);
 133}
 134
 135int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
 136{
 137    XiveCpuGetState s = {
 138        .tctx = tctx,
 139        .errp = errp,
 140    };
 141
 142    /*
 143     * Kick the vCPU to make sure they are available for the KVM ioctl.
 144     */
 145    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
 146               RUN_ON_CPU_HOST_PTR(&s));
 147
 148    return s.ret;
 149}
 150
 151int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 152{
 153    ERRP_GUARD();
 154    SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
 155    unsigned long vcpu_id;
 156    int ret;
 157
 158    assert(xive->fd != -1);
 159
 160    /* Check if CPU was hot unplugged and replugged. */
 161    if (kvm_cpu_is_enabled(tctx->cs)) {
 162        return 0;
 163    }
 164
 165    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
 166
 167    trace_kvm_xive_cpu_connect(vcpu_id);
 168
 169    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
 170                              vcpu_id, 0);
 171    if (ret < 0) {
 172        error_setg_errno(errp, -ret,
 173                         "XIVE: unable to connect CPU%ld to KVM device",
 174                         vcpu_id);
 175        if (ret == -ENOSPC) {
 176            error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n",
 177                              MACHINE(qdev_get_machine())->smp.max_cpus);
 178        }
 179        return ret;
 180    }
 181
 182    kvm_cpu_enable(tctx->cs);
 183    return 0;
 184}
 185
 186/*
 187 * XIVE Interrupt Source (KVM)
 188 */
 189
 190int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
 191                                  Error **errp)
 192{
 193    uint32_t end_idx;
 194    uint32_t end_blk;
 195    uint8_t priority;
 196    uint32_t server;
 197    bool masked;
 198    uint32_t eisn;
 199    uint64_t kvm_src;
 200
 201    assert(xive_eas_is_valid(eas));
 202
 203    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
 204    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
 205    eisn = xive_get_field64(EAS_END_DATA, eas->w);
 206    masked = xive_eas_is_masked(eas);
 207
 208    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
 209
 210    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
 211        KVM_XIVE_SOURCE_PRIORITY_MASK;
 212    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
 213        KVM_XIVE_SOURCE_SERVER_MASK;
 214    kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) &
 215        KVM_XIVE_SOURCE_MASKED_MASK;
 216    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
 217        KVM_XIVE_SOURCE_EISN_MASK;
 218
 219    return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
 220                             &kvm_src, true, errp);
 221}
 222
 223void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
 224{
 225    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
 226                      NULL, true, errp);
 227}
 228
 229/*
 230 * At reset, the interrupt sources are simply created and MASKED. We
 231 * only need to inform the KVM XIVE device about their type: LSI or
 232 * MSI.
 233 */
 234int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
 235{
 236    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
 237    uint64_t state = 0;
 238
 239    trace_kvm_xive_source_reset(srcno);
 240
 241    assert(xive->fd != -1);
 242
 243    if (xive_source_irq_is_lsi(xsrc, srcno)) {
 244        state |= KVM_XIVE_LEVEL_SENSITIVE;
 245        if (xive_source_is_asserted(xsrc, srcno)) {
 246            state |= KVM_XIVE_LEVEL_ASSERTED;
 247        }
 248    }
 249
 250    return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
 251                             true, errp);
 252}
 253
 254static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
 255{
 256    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
 257    int i;
 258
 259    for (i = 0; i < xsrc->nr_irqs; i++) {
 260        int ret;
 261
 262        if (!xive_eas_is_valid(&xive->eat[i])) {
 263            continue;
 264        }
 265
 266        ret = kvmppc_xive_source_reset_one(xsrc, i, errp);
 267        if (ret < 0) {
 268            return ret;
 269        }
 270    }
 271
 272    return 0;
 273}
 274
 275/*
 276 * This is used to perform the magic loads on the ESB pages, described
 277 * in xive.h.
 278 *
 279 * Memory barriers should not be needed for loads (no store for now).
 280 */
 281static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
 282                            uint64_t data, bool write)
 283{
 284    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) +
 285        offset;
 286
 287    if (write) {
 288        *addr = cpu_to_be64(data);
 289        return -1;
 290    } else {
 291        /* Prevent the compiler from optimizing away the load */
 292        volatile uint64_t value = be64_to_cpu(*addr);
 293        return value;
 294    }
 295}
 296
 297static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
 298{
 299    return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
 300}
 301
 302static void kvmppc_xive_esb_trigger(XiveSource *xsrc, int srcno)
 303{
 304    xive_esb_rw(xsrc, srcno, 0, 0, true);
 305}
 306
 307uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
 308                            uint64_t data, bool write)
 309{
 310    if (write) {
 311        return xive_esb_rw(xsrc, srcno, offset, data, 1);
 312    }
 313
 314    /*
 315     * Special Load EOI handling for LSI sources. Q bit is never set
 316     * and the interrupt should be re-triggered if the level is still
 317     * asserted.
 318     */
 319    if (xive_source_irq_is_lsi(xsrc, srcno) &&
 320        offset == XIVE_ESB_LOAD_EOI) {
 321        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
 322        if (xive_source_is_asserted(xsrc, srcno)) {
 323            kvmppc_xive_esb_trigger(xsrc, srcno);
 324        }
 325        return 0;
 326    } else {
 327        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
 328    }
 329}
 330
 331static void kvmppc_xive_source_get_state(XiveSource *xsrc)
 332{
 333    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
 334    int i;
 335
 336    for (i = 0; i < xsrc->nr_irqs; i++) {
 337        uint8_t pq;
 338
 339        if (!xive_eas_is_valid(&xive->eat[i])) {
 340            continue;
 341        }
 342
 343        /* Perform a load without side effect to retrieve the PQ bits */
 344        pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
 345
 346        /* and save PQ locally */
 347        xive_source_esb_set(xsrc, i, pq);
 348    }
 349}
 350
 351void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 352{
 353    XiveSource *xsrc = opaque;
 354
 355    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
 356        if (!val) {
 357            return;
 358        }
 359    } else {
 360        xive_source_set_asserted(xsrc, srcno, val);
 361    }
 362
 363    kvmppc_xive_esb_trigger(xsrc, srcno);
 364}
 365
 366/*
 367 * sPAPR XIVE interrupt controller (KVM)
 368 */
 369int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
 370                                 uint32_t end_idx, XiveEND *end,
 371                                 Error **errp)
 372{
 373    struct kvm_ppc_xive_eq kvm_eq = { 0 };
 374    uint64_t kvm_eq_idx;
 375    uint8_t priority;
 376    uint32_t server;
 377    int ret;
 378
 379    assert(xive_end_is_valid(end));
 380
 381    /* Encode the tuple (server, prio) as a KVM EQ index */
 382    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
 383
 384    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
 385            KVM_XIVE_EQ_PRIORITY_MASK;
 386    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
 387        KVM_XIVE_EQ_SERVER_MASK;
 388
 389    ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
 390                            &kvm_eq, false, errp);
 391    if (ret < 0) {
 392        return ret;
 393    }
 394
 395    /*
 396     * The EQ index and toggle bit are updated by HW. These are the
 397     * only fields from KVM we want to update QEMU with. The other END
 398     * fields should already be in the QEMU END table.
 399     */
 400    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
 401        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
 402
 403    return 0;
 404}
 405
 406int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
 407                                 uint32_t end_idx, XiveEND *end,
 408                                 Error **errp)
 409{
 410    struct kvm_ppc_xive_eq kvm_eq = { 0 };
 411    uint64_t kvm_eq_idx;
 412    uint8_t priority;
 413    uint32_t server;
 414
 415    /*
 416     * Build the KVM state from the local END structure.
 417     */
 418
 419    kvm_eq.flags = 0;
 420    if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) {
 421        kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
 422    }
 423
 424    /*
 425     * If the hcall is disabling the EQ, set the size and page address
 426     * to zero. When migrating, only valid ENDs are taken into
 427     * account.
 428     */
 429    if (xive_end_is_valid(end)) {
 430        kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
 431        kvm_eq.qaddr  = xive_end_qaddr(end);
 432        /*
 433         * The EQ toggle bit and index should only be relevant when
 434         * restoring the EQ state
 435         */
 436        kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
 437        kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
 438    } else {
 439        kvm_eq.qshift = 0;
 440        kvm_eq.qaddr  = 0;
 441    }
 442
 443    /* Encode the tuple (server, prio) as a KVM EQ index */
 444    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
 445
 446    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
 447            KVM_XIVE_EQ_PRIORITY_MASK;
 448    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
 449        KVM_XIVE_EQ_SERVER_MASK;
 450
 451    return
 452        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
 453                          &kvm_eq, true, errp);
 454}
 455
 456void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
 457{
 458    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
 459                      NULL, true, errp);
 460}
 461
 462static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
 463{
 464    int i;
 465    int ret;
 466
 467    for (i = 0; i < xive->nr_ends; i++) {
 468        if (!xive_end_is_valid(&xive->endt[i])) {
 469            continue;
 470        }
 471
 472        ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
 473                                           &xive->endt[i], errp);
 474        if (ret < 0) {
 475            return ret;
 476        }
 477    }
 478
 479    return 0;
 480}
 481
 482/*
 483 * The primary goal of the XIVE VM change handler is to mark the EQ
 484 * pages dirty when all XIVE event notifications have stopped.
 485 *
 486 * Whenever the VM is stopped, the VM change handler sets the source
 487 * PQs to PENDING to stop the flow of events and to possibly catch a
 488 * triggered interrupt occuring while the VM is stopped. The previous
 489 * state is saved in anticipation of a migration. The XIVE controller
 490 * is then synced through KVM to flush any in-flight event
 491 * notification and stabilize the EQs.
 492 *
 493 * At this stage, we can mark the EQ page dirty and let a migration
 494 * sequence transfer the EQ pages to the destination, which is done
 495 * just after the stop state.
 496 *
 497 * The previous configuration of the sources is restored when the VM
 498 * runs again. If an interrupt was queued while the VM was stopped,
 499 * simply generate a trigger.
 500 */
 501static void kvmppc_xive_change_state_handler(void *opaque, bool running,
 502                                             RunState state)
 503{
 504    SpaprXive *xive = opaque;
 505    XiveSource *xsrc = &xive->source;
 506    Error *local_err = NULL;
 507    int i;
 508
 509    /*
 510     * Restore the sources to their initial state. This is called when
 511     * the VM resumes after a stop or a migration.
 512     */
 513    if (running) {
 514        for (i = 0; i < xsrc->nr_irqs; i++) {
 515            uint8_t pq;
 516            uint8_t old_pq;
 517
 518            if (!xive_eas_is_valid(&xive->eat[i])) {
 519                continue;
 520            }
 521
 522            pq = xive_source_esb_get(xsrc, i);
 523            old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
 524
 525            /*
 526             * An interrupt was queued while the VM was stopped,
 527             * generate a trigger.
 528             */
 529            if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
 530                kvmppc_xive_esb_trigger(xsrc, i);
 531            }
 532        }
 533
 534        return;
 535    }
 536
 537    /*
 538     * Mask the sources, to stop the flow of event notifications, and
 539     * save the PQs locally in the XiveSource object. The XiveSource
 540     * state will be collected later on by its vmstate handler if a
 541     * migration is in progress.
 542     */
 543    for (i = 0; i < xsrc->nr_irqs; i++) {
 544        uint8_t pq;
 545
 546        if (!xive_eas_is_valid(&xive->eat[i])) {
 547            continue;
 548        }
 549
 550        pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
 551
 552        /*
 553         * PQ is set to PENDING to possibly catch a triggered
 554         * interrupt occuring while the VM is stopped (hotplug event
 555         * for instance) .
 556         */
 557        if (pq != XIVE_ESB_OFF) {
 558            pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10);
 559        }
 560        xive_source_esb_set(xsrc, i, pq);
 561    }
 562
 563    /*
 564     * Sync the XIVE controller in KVM, to flush in-flight event
 565     * notification that should be enqueued in the EQs and mark the
 566     * XIVE EQ pages dirty to collect all updates.
 567     */
 568    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
 569                      KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err);
 570    if (local_err) {
 571        error_report_err(local_err);
 572        return;
 573    }
 574}
 575
 576void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
 577{
 578    assert(xive->fd != -1);
 579
 580    /*
 581     * When the VM is stopped, the sources are masked and the previous
 582     * state is saved in anticipation of a migration. We should not
 583     * synchronize the source state in that case else we will override
 584     * the saved state.
 585     */
 586    if (runstate_is_running()) {
 587        kvmppc_xive_source_get_state(&xive->source);
 588    }
 589
 590    /* EAT: there is no extra state to query from KVM */
 591
 592    /* ENDT */
 593    kvmppc_xive_get_queues(xive, errp);
 594}
 595
 596/*
 597 * The SpaprXive 'pre_save' method is called by the vmstate handler of
 598 * the SpaprXive model, after the XIVE controller is synced in the VM
 599 * change handler.
 600 */
 601int kvmppc_xive_pre_save(SpaprXive *xive)
 602{
 603    Error *local_err = NULL;
 604    int ret;
 605
 606    assert(xive->fd != -1);
 607
 608    /* EAT: there is no extra state to query from KVM */
 609
 610    /* ENDT */
 611    ret = kvmppc_xive_get_queues(xive, &local_err);
 612    if (ret < 0) {
 613        error_report_err(local_err);
 614        return ret;
 615    }
 616
 617    return 0;
 618}
 619
 620/*
 621 * The SpaprXive 'post_load' method is not called by a vmstate
 622 * handler. It is called at the sPAPR machine level at the end of the
 623 * migration sequence by the sPAPR IRQ backend 'post_load' method,
 624 * when all XIVE states have been transferred and loaded.
 625 */
 626int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
 627{
 628    Error *local_err = NULL;
 629    CPUState *cs;
 630    int i;
 631    int ret;
 632
 633    /* The KVM XIVE device should be in use */
 634    assert(xive->fd != -1);
 635
 636    /* Restore the ENDT first. The targetting depends on it. */
 637    for (i = 0; i < xive->nr_ends; i++) {
 638        if (!xive_end_is_valid(&xive->endt[i])) {
 639            continue;
 640        }
 641
 642        ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
 643                                           &xive->endt[i], &local_err);
 644        if (ret < 0) {
 645            goto fail;
 646        }
 647    }
 648
 649    /* Restore the EAT */
 650    for (i = 0; i < xive->nr_irqs; i++) {
 651        if (!xive_eas_is_valid(&xive->eat[i])) {
 652            continue;
 653        }
 654
 655        /*
 656         * We can only restore the source config if the source has been
 657         * previously set in KVM. Since we don't do that for all interrupts
 658         * at reset time anymore, let's do it now.
 659         */
 660        ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err);
 661        if (ret < 0) {
 662            goto fail;
 663        }
 664
 665        ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
 666        if (ret < 0) {
 667            goto fail;
 668        }
 669    }
 670
 671    /*
 672     * Restore the thread interrupt contexts of initial CPUs.
 673     *
 674     * The context of hotplugged CPUs is restored later, by the
 675     * 'post_load' handler of the XiveTCTX model because they are not
 676     * available at the time the SpaprXive 'post_load' method is
 677     * called. We can not restore the context of all CPUs in the
 678     * 'post_load' handler of XiveTCTX because the machine is not
 679     * necessarily connected to the KVM device at that time.
 680     */
 681    CPU_FOREACH(cs) {
 682        PowerPCCPU *cpu = POWERPC_CPU(cs);
 683
 684        ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
 685        if (ret < 0) {
 686            goto fail;
 687        }
 688    }
 689
 690    /* The source states will be restored when the machine starts running */
 691    return 0;
 692
 693fail:
 694    error_report_err(local_err);
 695    return ret;
 696}
 697
 698/* Returns MAP_FAILED on error and sets errno */
 699static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
 700                              Error **errp)
 701{
 702    void *addr;
 703    uint32_t page_shift = 16; /* TODO: fix page_shift */
 704
 705    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
 706                pgoff << page_shift);
 707    if (addr == MAP_FAILED) {
 708        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
 709    }
 710
 711    return addr;
 712}
 713
 714/*
 715 * All the XIVE memory regions are now backed by mappings from the KVM
 716 * XIVE device.
 717 */
 718int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
 719                        Error **errp)
 720{
 721    SpaprXive *xive = SPAPR_XIVE(intc);
 722    XiveSource *xsrc = &xive->source;
 723    size_t esb_len = xive_source_esb_len(xsrc);
 724    size_t tima_len = 4ull << TM_SHIFT;
 725    CPUState *cs;
 726    int fd;
 727    void *addr;
 728    int ret;
 729
 730    /*
 731     * The KVM XIVE device already in use. This is the case when
 732     * rebooting under the XIVE-only interrupt mode.
 733     */
 734    if (xive->fd != -1) {
 735        return 0;
 736    }
 737
 738    if (!kvmppc_has_cap_xive()) {
 739        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
 740        return -1;
 741    }
 742
 743    /* First, create the KVM XIVE device */
 744    fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
 745    if (fd < 0) {
 746        error_setg_errno(errp, -fd, "XIVE: error creating KVM device");
 747        return -1;
 748    }
 749    xive->fd = fd;
 750
 751    /* Tell KVM about the # of VCPUs we may have */
 752    if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
 753                              KVM_DEV_XIVE_NR_SERVERS)) {
 754        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
 755                                KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true,
 756                                errp);
 757        if (ret < 0) {
 758            goto fail;
 759        }
 760    }
 761
 762    /*
 763     * 1. Source ESB pages - KVM mapping
 764     */
 765    addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp);
 766    if (addr == MAP_FAILED) {
 767        goto fail;
 768    }
 769    xsrc->esb_mmap = addr;
 770
 771    memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc),
 772                                      "xive.esb-kvm", esb_len, xsrc->esb_mmap);
 773    memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0,
 774                                        &xsrc->esb_mmio_kvm, 1);
 775
 776    /*
 777     * 2. END ESB pages (No KVM support yet)
 778     */
 779
 780    /*
 781     * 3. TIMA pages - KVM mapping
 782     */
 783    addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp);
 784    if (addr == MAP_FAILED) {
 785        goto fail;
 786    }
 787    xive->tm_mmap = addr;
 788
 789    memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive),
 790                                      "xive.tima", tima_len, xive->tm_mmap);
 791    memory_region_add_subregion_overlap(&xive->tm_mmio, 0,
 792                                        &xive->tm_mmio_kvm, 1);
 793
 794    xive->change = qemu_add_vm_change_state_handler(
 795        kvmppc_xive_change_state_handler, xive);
 796
 797    /* Connect the presenters to the initial VCPUs of the machine */
 798    CPU_FOREACH(cs) {
 799        PowerPCCPU *cpu = POWERPC_CPU(cs);
 800
 801        ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp);
 802        if (ret < 0) {
 803            goto fail;
 804        }
 805    }
 806
 807    /* Update the KVM sources */
 808    ret = kvmppc_xive_source_reset(xsrc, errp);
 809    if (ret < 0) {
 810        goto fail;
 811    }
 812
 813    kvm_kernel_irqchip = true;
 814    kvm_msi_via_irqfd_allowed = true;
 815    kvm_gsi_direct_mapping = true;
 816    return 0;
 817
 818fail:
 819    kvmppc_xive_disconnect(intc);
 820    return -1;
 821}
 822
 823void kvmppc_xive_disconnect(SpaprInterruptController *intc)
 824{
 825    SpaprXive *xive = SPAPR_XIVE(intc);
 826    XiveSource *xsrc;
 827    size_t esb_len;
 828
 829    assert(xive->fd != -1);
 830
 831    /* Clear the KVM mapping */
 832    xsrc = &xive->source;
 833    esb_len = xive_source_esb_len(xsrc);
 834
 835    if (xsrc->esb_mmap) {
 836        memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm);
 837        object_unparent(OBJECT(&xsrc->esb_mmio_kvm));
 838        munmap(xsrc->esb_mmap, esb_len);
 839        xsrc->esb_mmap = NULL;
 840    }
 841
 842    if (xive->tm_mmap) {
 843        memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm);
 844        object_unparent(OBJECT(&xive->tm_mmio_kvm));
 845        munmap(xive->tm_mmap, 4ull << TM_SHIFT);
 846        xive->tm_mmap = NULL;
 847    }
 848
 849    /*
 850     * When the KVM device fd is closed, the KVM device is destroyed
 851     * and removed from the list of devices of the VM. The VCPU
 852     * presenters are also detached from the device.
 853     */
 854    close(xive->fd);
 855    xive->fd = -1;
 856
 857    kvm_kernel_irqchip = false;
 858    kvm_msi_via_irqfd_allowed = false;
 859    kvm_gsi_direct_mapping = false;
 860
 861    /* Clear the local list of presenter (hotplug) */
 862    kvm_cpu_disable_all();
 863
 864    /* VM Change state handler is not needed anymore */
 865    if (xive->change) {
 866        qemu_del_vm_change_state_handler(xive->change);
 867        xive->change = NULL;
 868    }
 869}
 870