qemu/hw/ppc/spapr_irq.c
<<
>>
Prefs
   1/*
   2 * QEMU PowerPC sPAPR IRQ interface
   3 *
   4 * Copyright (c) 2018, IBM Corporation.
   5 *
   6 * This code is licensed under the GPL version 2 or later. See the
   7 * COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/log.h"
  12#include "qemu/error-report.h"
  13#include "qapi/error.h"
  14#include "hw/irq.h"
  15#include "hw/ppc/spapr.h"
  16#include "hw/ppc/spapr_cpu_core.h"
  17#include "hw/ppc/spapr_xive.h"
  18#include "hw/ppc/xics.h"
  19#include "hw/ppc/xics_spapr.h"
  20#include "hw/qdev-properties.h"
  21#include "cpu-models.h"
  22#include "sysemu/kvm.h"
  23
  24#include "trace.h"
  25
  26static const TypeInfo spapr_intc_info = {
  27    .name = TYPE_SPAPR_INTC,
  28    .parent = TYPE_INTERFACE,
  29    .class_size = sizeof(SpaprInterruptControllerClass),
  30};
  31
  32static void spapr_irq_msi_init(SpaprMachineState *spapr)
  33{
  34    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
  35        /* Legacy mode doesn't use this allocator */
  36        return;
  37    }
  38
  39    spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
  40    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
  41}
  42
  43int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
  44                        Error **errp)
  45{
  46    int irq;
  47
  48    /*
  49     * The 'align_mask' parameter of bitmap_find_next_zero_area()
  50     * should be one less than a power of 2; 0 means no
  51     * alignment. Adapt the 'align' value of the former allocator
  52     * to fit the requirements of bitmap_find_next_zero_area()
  53     */
  54    align -= 1;
  55
  56    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
  57                                     align);
  58    if (irq == spapr->irq_map_nr) {
  59        error_setg(errp, "can't find a free %d-IRQ block", num);
  60        return -1;
  61    }
  62
  63    bitmap_set(spapr->irq_map, irq, num);
  64
  65    return irq + SPAPR_IRQ_MSI;
  66}
  67
  68void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
  69{
  70    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
  71}
  72
  73int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
  74                       SpaprInterruptController *intc,
  75                       uint32_t nr_servers,
  76                       Error **errp)
  77{
  78    Error *local_err = NULL;
  79
  80    if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
  81        if (fn(intc, nr_servers, &local_err) < 0) {
  82            if (kvm_kernel_irqchip_required()) {
  83                error_prepend(&local_err,
  84                              "kernel_irqchip requested but unavailable: ");
  85                error_propagate(errp, local_err);
  86                return -1;
  87            }
  88
  89            /*
  90             * We failed to initialize the KVM device, fallback to
  91             * emulated mode
  92             */
  93            error_prepend(&local_err,
  94                          "kernel_irqchip allowed but unavailable: ");
  95            error_append_hint(&local_err,
  96                              "Falling back to kernel-irqchip=off\n");
  97            warn_report_err(local_err);
  98        }
  99    }
 100
 101    return 0;
 102}
 103
 104/*
 105 * XICS IRQ backend.
 106 */
 107
 108SpaprIrq spapr_irq_xics = {
 109    .xics        = true,
 110    .xive        = false,
 111};
 112
 113/*
 114 * XIVE IRQ backend.
 115 */
 116
 117SpaprIrq spapr_irq_xive = {
 118    .xics        = false,
 119    .xive        = true,
 120};
 121
 122/*
 123 * Dual XIVE and XICS IRQ backend.
 124 *
 125 * Both interrupt mode, XIVE and XICS, objects are created but the
 126 * machine starts in legacy interrupt mode (XICS). It can be changed
 127 * by the CAS negotiation process and, in that case, the new mode is
 128 * activated after an extra machine reset.
 129 */
 130
 131/*
 132 * Define values in sync with the XIVE and XICS backend
 133 */
 134SpaprIrq spapr_irq_dual = {
 135    .xics        = true,
 136    .xive        = true,
 137};
 138
 139
 140static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
 141{
 142    ERRP_GUARD();
 143    MachineState *machine = MACHINE(spapr);
 144
 145    /*
 146     * Sanity checks on non-P9 machines. On these, XIVE is not
 147     * advertised, see spapr_dt_ov5_platform_support()
 148     */
 149    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
 150                               0, spapr->max_compat_pvr)) {
 151        /*
 152         * If the 'dual' interrupt mode is selected, force XICS as CAS
 153         * negotiation is useless.
 154         */
 155        if (spapr->irq == &spapr_irq_dual) {
 156            spapr->irq = &spapr_irq_xics;
 157            return 0;
 158        }
 159
 160        /*
 161         * Non-P9 machines using only XIVE is a bogus setup. We have two
 162         * scenarios to take into account because of the compat mode:
 163         *
 164         * 1. POWER7/8 machines should fail to init later on when creating
 165         *    the XIVE interrupt presenters because a POWER9 exception
 166         *    model is required.
 167
 168         * 2. POWER9 machines using the POWER8 compat mode won't fail and
 169         *    will let the OS boot with a partial XIVE setup : DT
 170         *    properties but no hcalls.
 171         *
 172         * To cover both and not confuse the OS, add an early failure in
 173         * QEMU.
 174         */
 175        if (!spapr->irq->xics) {
 176            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
 177            return -1;
 178        }
 179    }
 180
 181    /*
 182     * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
 183     * re-created. Same happens with KVM nested guests. Detect that early to
 184     * avoid QEMU to exit later when the guest reboots.
 185     */
 186    if (kvm_enabled() &&
 187        spapr->irq == &spapr_irq_dual &&
 188        kvm_kernel_irqchip_required() &&
 189        xics_kvm_has_broken_disconnect()) {
 190        error_setg(errp,
 191            "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
 192        error_append_hint(errp,
 193            "This can happen with an old KVM or in a KVM nested guest.\n");
 194        error_append_hint(errp,
 195            "Try without kernel-irqchip or with kernel-irqchip=off.\n");
 196        return -1;
 197    }
 198
 199    return 0;
 200}
 201
 202/*
 203 * sPAPR IRQ frontend routines for devices
 204 */
 205#define ALL_INTCS(spapr_) \
 206    { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
 207
 208int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
 209                              PowerPCCPU *cpu, Error **errp)
 210{
 211    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 212    int i;
 213    int rc;
 214
 215    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 216        SpaprInterruptController *intc = intcs[i];
 217        if (intc) {
 218            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 219            rc = sicc->cpu_intc_create(intc, cpu, errp);
 220            if (rc < 0) {
 221                return rc;
 222            }
 223        }
 224    }
 225
 226    return 0;
 227}
 228
 229void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
 230{
 231    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 232    int i;
 233
 234    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 235        SpaprInterruptController *intc = intcs[i];
 236        if (intc) {
 237            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 238            sicc->cpu_intc_reset(intc, cpu);
 239        }
 240    }
 241}
 242
 243void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
 244{
 245    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 246    int i;
 247
 248    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 249        SpaprInterruptController *intc = intcs[i];
 250        if (intc) {
 251            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 252            sicc->cpu_intc_destroy(intc, cpu);
 253        }
 254    }
 255}
 256
 257static void spapr_set_irq(void *opaque, int irq, int level)
 258{
 259    SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
 260    SpaprInterruptControllerClass *sicc
 261        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 262
 263    sicc->set_irq(spapr->active_intc, irq, level);
 264}
 265
 266void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
 267{
 268    SpaprInterruptControllerClass *sicc
 269        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 270
 271    sicc->print_info(spapr->active_intc, mon);
 272}
 273
 274void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
 275                  void *fdt, uint32_t phandle)
 276{
 277    SpaprInterruptControllerClass *sicc
 278        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 279
 280    sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
 281}
 282
 283uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
 284{
 285    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 286
 287    if (smc->legacy_irq_allocation) {
 288        return smc->nr_xirqs;
 289    } else {
 290        return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
 291    }
 292}
 293
 294void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
 295{
 296    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 297
 298    if (kvm_enabled() && kvm_kernel_irqchip_split()) {
 299        error_setg(errp, "kernel_irqchip split mode not supported on pseries");
 300        return;
 301    }
 302
 303    if (spapr_irq_check(spapr, errp) < 0) {
 304        return;
 305    }
 306
 307    /* Initialize the MSI IRQ allocator. */
 308    spapr_irq_msi_init(spapr);
 309
 310    if (spapr->irq->xics) {
 311        Object *obj;
 312
 313        obj = object_new(TYPE_ICS_SPAPR);
 314
 315        object_property_add_child(OBJECT(spapr), "ics", obj);
 316        object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
 317                                 &error_abort);
 318        object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
 319        if (!qdev_realize(DEVICE(obj), NULL, errp)) {
 320            return;
 321        }
 322
 323        spapr->ics = ICS_SPAPR(obj);
 324    }
 325
 326    if (spapr->irq->xive) {
 327        uint32_t nr_servers = spapr_max_server_number(spapr);
 328        DeviceState *dev;
 329        int i;
 330
 331        dev = qdev_new(TYPE_SPAPR_XIVE);
 332        qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
 333        /*
 334         * 8 XIVE END structures per CPU. One for each available
 335         * priority
 336         */
 337        qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
 338        object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
 339                                 &error_abort);
 340        sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 341
 342        spapr->xive = SPAPR_XIVE(dev);
 343
 344        /* Enable the CPU IPIs */
 345        for (i = 0; i < nr_servers; ++i) {
 346            SpaprInterruptControllerClass *sicc
 347                = SPAPR_INTC_GET_CLASS(spapr->xive);
 348
 349            if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
 350                                false, errp) < 0) {
 351                return;
 352            }
 353        }
 354
 355        spapr_xive_hcall_init(spapr);
 356    }
 357
 358    spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
 359                                      smc->nr_xirqs + SPAPR_XIRQ_BASE);
 360
 361    /*
 362     * Mostly we don't actually need this until reset, except that not
 363     * having this set up can cause VFIO devices to issue a
 364     * false-positive warning during realize(), because they don't yet
 365     * have an in-kernel irq chip.
 366     */
 367    spapr_irq_update_active_intc(spapr);
 368}
 369
 370int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
 371{
 372    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 373    int i;
 374    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 375    int rc;
 376
 377    assert(irq >= SPAPR_XIRQ_BASE);
 378    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 379
 380    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 381        SpaprInterruptController *intc = intcs[i];
 382        if (intc) {
 383            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 384            rc = sicc->claim_irq(intc, irq, lsi, errp);
 385            if (rc < 0) {
 386                return rc;
 387            }
 388        }
 389    }
 390
 391    return 0;
 392}
 393
 394void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
 395{
 396    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 397    int i, j;
 398    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 399
 400    assert(irq >= SPAPR_XIRQ_BASE);
 401    assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 402
 403    for (i = irq; i < (irq + num); i++) {
 404        for (j = 0; j < ARRAY_SIZE(intcs); j++) {
 405            SpaprInterruptController *intc = intcs[j];
 406
 407            if (intc) {
 408                SpaprInterruptControllerClass *sicc
 409                    = SPAPR_INTC_GET_CLASS(intc);
 410                sicc->free_irq(intc, i);
 411            }
 412        }
 413    }
 414}
 415
 416qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
 417{
 418    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 419
 420    /*
 421     * This interface is basically for VIO and PHB devices to find the
 422     * right qemu_irq to manipulate, so we only allow access to the
 423     * external irqs for now.  Currently anything which needs to
 424     * access the IPIs most naturally gets there via the guest side
 425     * interfaces, we can change this if we need to in future.
 426     */
 427    assert(irq >= SPAPR_XIRQ_BASE);
 428    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 429
 430    if (spapr->ics) {
 431        assert(ics_valid_irq(spapr->ics, irq));
 432    }
 433    if (spapr->xive) {
 434        assert(irq < spapr->xive->nr_irqs);
 435        assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
 436    }
 437
 438    return spapr->qirqs[irq];
 439}
 440
 441int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
 442{
 443    SpaprInterruptControllerClass *sicc;
 444
 445    spapr_irq_update_active_intc(spapr);
 446    sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 447    return sicc->post_load(spapr->active_intc, version_id);
 448}
 449
 450void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
 451{
 452    assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
 453
 454    spapr_irq_update_active_intc(spapr);
 455}
 456
 457int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
 458{
 459    const char *nodename = "interrupt-controller";
 460    int offset, phandle;
 461
 462    offset = fdt_subnode_offset(fdt, 0, nodename);
 463    if (offset < 0) {
 464        error_setg(errp, "Can't find node \"%s\": %s",
 465                   nodename, fdt_strerror(offset));
 466        return -1;
 467    }
 468
 469    phandle = fdt_get_phandle(fdt, offset);
 470    if (!phandle) {
 471        error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
 472        return -1;
 473    }
 474
 475    return phandle;
 476}
 477
 478static void set_active_intc(SpaprMachineState *spapr,
 479                            SpaprInterruptController *new_intc)
 480{
 481    SpaprInterruptControllerClass *sicc;
 482    uint32_t nr_servers = spapr_max_server_number(spapr);
 483
 484    assert(new_intc);
 485
 486    if (new_intc == spapr->active_intc) {
 487        /* Nothing to do */
 488        return;
 489    }
 490
 491    if (spapr->active_intc) {
 492        sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 493        if (sicc->deactivate) {
 494            sicc->deactivate(spapr->active_intc);
 495        }
 496    }
 497
 498    sicc = SPAPR_INTC_GET_CLASS(new_intc);
 499    if (sicc->activate) {
 500        sicc->activate(new_intc, nr_servers, &error_fatal);
 501    }
 502
 503    spapr->active_intc = new_intc;
 504
 505    /*
 506     * We've changed the kernel irqchip, let VFIO devices know they
 507     * need to readjust.
 508     */
 509    kvm_irqchip_change_notify();
 510}
 511
 512void spapr_irq_update_active_intc(SpaprMachineState *spapr)
 513{
 514    SpaprInterruptController *new_intc;
 515
 516    if (!spapr->ics) {
 517        /*
 518         * XXX before we run CAS, ov5_cas is initialized empty, which
 519         * indicates XICS, even if we have ic-mode=xive.  TODO: clean
 520         * up the CAS path so that we have a clearer way of handling
 521         * this.
 522         */
 523        new_intc = SPAPR_INTC(spapr->xive);
 524    } else if (spapr->ov5_cas
 525               && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
 526        new_intc = SPAPR_INTC(spapr->xive);
 527    } else {
 528        new_intc = SPAPR_INTC(spapr->ics);
 529    }
 530
 531    set_active_intc(spapr, new_intc);
 532}
 533
 534/*
 535 * XICS legacy routines - to deprecate one day
 536 */
 537
 538static int ics_find_free_block(ICSState *ics, int num, int alignnum)
 539{
 540    int first, i;
 541
 542    for (first = 0; first < ics->nr_irqs; first += alignnum) {
 543        if (num > (ics->nr_irqs - first)) {
 544            return -1;
 545        }
 546        for (i = first; i < first + num; ++i) {
 547            if (!ics_irq_free(ics, i)) {
 548                break;
 549            }
 550        }
 551        if (i == (first + num)) {
 552            return first;
 553        }
 554    }
 555
 556    return -1;
 557}
 558
 559int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
 560{
 561    ICSState *ics = spapr->ics;
 562    int first = -1;
 563
 564    assert(ics);
 565
 566    /*
 567     * MSIMesage::data is used for storing VIRQ so
 568     * it has to be aligned to num to support multiple
 569     * MSI vectors. MSI-X is not affected by this.
 570     * The hint is used for the first IRQ, the rest should
 571     * be allocated continuously.
 572     */
 573    if (align) {
 574        assert((num == 1) || (num == 2) || (num == 4) ||
 575               (num == 8) || (num == 16) || (num == 32));
 576        first = ics_find_free_block(ics, num, num);
 577    } else {
 578        first = ics_find_free_block(ics, num, 1);
 579    }
 580
 581    if (first < 0) {
 582        error_setg(errp, "can't find a free %d-IRQ block", num);
 583        return -1;
 584    }
 585
 586    return first + ics->offset;
 587}
 588
 589SpaprIrq spapr_irq_xics_legacy = {
 590    .xics        = true,
 591    .xive        = false,
 592};
 593
 594static void spapr_irq_register_types(void)
 595{
 596    type_register_static(&spapr_intc_info);
 597}
 598
 599type_init(spapr_irq_register_types)
 600