qemu/hw/ppc/spapr_irq.c
<<
>>
Prefs
   1/*
   2 * QEMU PowerPC sPAPR IRQ interface
   3 *
   4 * Copyright (c) 2018, IBM Corporation.
   5 *
   6 * This code is licensed under the GPL version 2 or later. See the
   7 * COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/log.h"
  12#include "qemu/error-report.h"
  13#include "qapi/error.h"
  14#include "hw/irq.h"
  15#include "hw/ppc/spapr.h"
  16#include "hw/ppc/spapr_cpu_core.h"
  17#include "hw/ppc/spapr_xive.h"
  18#include "hw/ppc/xics.h"
  19#include "hw/ppc/xics_spapr.h"
  20#include "hw/qdev-properties.h"
  21#include "cpu-models.h"
  22#include "sysemu/kvm.h"
  23
  24#include "trace.h"
  25
  26static const TypeInfo spapr_intc_info = {
  27    .name = TYPE_SPAPR_INTC,
  28    .parent = TYPE_INTERFACE,
  29    .class_size = sizeof(SpaprInterruptControllerClass),
  30};
  31
  32static void spapr_irq_msi_init(SpaprMachineState *spapr)
  33{
  34    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
  35        /* Legacy mode doesn't use this allocator */
  36        return;
  37    }
  38
  39    spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
  40    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
  41}
  42
  43int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
  44                        Error **errp)
  45{
  46    int irq;
  47
  48    /*
  49     * The 'align_mask' parameter of bitmap_find_next_zero_area()
  50     * should be one less than a power of 2; 0 means no
  51     * alignment. Adapt the 'align' value of the former allocator
  52     * to fit the requirements of bitmap_find_next_zero_area()
  53     */
  54    align -= 1;
  55
  56    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
  57                                     align);
  58    if (irq == spapr->irq_map_nr) {
  59        error_setg(errp, "can't find a free %d-IRQ block", num);
  60        return -1;
  61    }
  62
  63    bitmap_set(spapr->irq_map, irq, num);
  64
  65    return irq + SPAPR_IRQ_MSI;
  66}
  67
  68void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
  69{
  70    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
  71}
  72
  73int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
  74                       SpaprInterruptController *intc,
  75                       Error **errp)
  76{
  77    MachineState *machine = MACHINE(qdev_get_machine());
  78    Error *local_err = NULL;
  79
  80    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
  81        if (fn(intc, &local_err) < 0) {
  82            if (machine_kernel_irqchip_required(machine)) {
  83                error_prepend(&local_err,
  84                              "kernel_irqchip requested but unavailable: ");
  85                error_propagate(errp, local_err);
  86                return -1;
  87            }
  88
  89            /*
  90             * We failed to initialize the KVM device, fallback to
  91             * emulated mode
  92             */
  93            error_prepend(&local_err,
  94                          "kernel_irqchip allowed but unavailable: ");
  95            error_append_hint(&local_err,
  96                              "Falling back to kernel-irqchip=off\n");
  97            warn_report_err(local_err);
  98        }
  99    }
 100
 101    return 0;
 102}
 103
 104/*
 105 * XICS IRQ backend.
 106 */
 107
 108SpaprIrq spapr_irq_xics = {
 109    .xics        = true,
 110    .xive        = false,
 111};
 112
 113/*
 114 * XIVE IRQ backend.
 115 */
 116
 117SpaprIrq spapr_irq_xive = {
 118    .xics        = false,
 119    .xive        = true,
 120};
 121
 122/*
 123 * Dual XIVE and XICS IRQ backend.
 124 *
 125 * Both interrupt mode, XIVE and XICS, objects are created but the
 126 * machine starts in legacy interrupt mode (XICS). It can be changed
 127 * by the CAS negotiation process and, in that case, the new mode is
 128 * activated after an extra machine reset.
 129 */
 130
 131/*
 132 * Define values in sync with the XIVE and XICS backend
 133 */
 134SpaprIrq spapr_irq_dual = {
 135    .xics        = true,
 136    .xive        = true,
 137};
 138
 139
 140static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
 141{
 142    MachineState *machine = MACHINE(spapr);
 143
 144    /*
 145     * Sanity checks on non-P9 machines. On these, XIVE is not
 146     * advertised, see spapr_dt_ov5_platform_support()
 147     */
 148    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
 149                               0, spapr->max_compat_pvr)) {
 150        /*
 151         * If the 'dual' interrupt mode is selected, force XICS as CAS
 152         * negotiation is useless.
 153         */
 154        if (spapr->irq == &spapr_irq_dual) {
 155            spapr->irq = &spapr_irq_xics;
 156            return 0;
 157        }
 158
 159        /*
 160         * Non-P9 machines using only XIVE is a bogus setup. We have two
 161         * scenarios to take into account because of the compat mode:
 162         *
 163         * 1. POWER7/8 machines should fail to init later on when creating
 164         *    the XIVE interrupt presenters because a POWER9 exception
 165         *    model is required.
 166
 167         * 2. POWER9 machines using the POWER8 compat mode won't fail and
 168         *    will let the OS boot with a partial XIVE setup : DT
 169         *    properties but no hcalls.
 170         *
 171         * To cover both and not confuse the OS, add an early failure in
 172         * QEMU.
 173         */
 174        if (spapr->irq == &spapr_irq_xive) {
 175            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
 176            return -1;
 177        }
 178    }
 179
 180    /*
 181     * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
 182     * re-created. Detect that early to avoid QEMU to exit later when the
 183     * guest reboots.
 184     */
 185    if (kvm_enabled() &&
 186        spapr->irq == &spapr_irq_dual &&
 187        machine_kernel_irqchip_required(machine) &&
 188        xics_kvm_has_broken_disconnect(spapr)) {
 189        error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
 190        return -1;
 191    }
 192
 193    return 0;
 194}
 195
 196/*
 197 * sPAPR IRQ frontend routines for devices
 198 */
 199#define ALL_INTCS(spapr_) \
 200    { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
 201
 202int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
 203                              PowerPCCPU *cpu, Error **errp)
 204{
 205    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 206    int i;
 207    int rc;
 208
 209    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 210        SpaprInterruptController *intc = intcs[i];
 211        if (intc) {
 212            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 213            rc = sicc->cpu_intc_create(intc, cpu, errp);
 214            if (rc < 0) {
 215                return rc;
 216            }
 217        }
 218    }
 219
 220    return 0;
 221}
 222
 223void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
 224{
 225    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 226    int i;
 227
 228    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 229        SpaprInterruptController *intc = intcs[i];
 230        if (intc) {
 231            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 232            sicc->cpu_intc_reset(intc, cpu);
 233        }
 234    }
 235}
 236
 237void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
 238{
 239    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 240    int i;
 241
 242    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 243        SpaprInterruptController *intc = intcs[i];
 244        if (intc) {
 245            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 246            sicc->cpu_intc_destroy(intc, cpu);
 247        }
 248    }
 249}
 250
 251static void spapr_set_irq(void *opaque, int irq, int level)
 252{
 253    SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
 254    SpaprInterruptControllerClass *sicc
 255        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 256
 257    sicc->set_irq(spapr->active_intc, irq, level);
 258}
 259
 260void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
 261{
 262    SpaprInterruptControllerClass *sicc
 263        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 264
 265    sicc->print_info(spapr->active_intc, mon);
 266}
 267
 268void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
 269                  void *fdt, uint32_t phandle)
 270{
 271    SpaprInterruptControllerClass *sicc
 272        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 273
 274    sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
 275}
 276
 277uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
 278{
 279    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 280
 281    if (smc->legacy_irq_allocation) {
 282        return smc->nr_xirqs;
 283    } else {
 284        return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
 285    }
 286}
 287
 288void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
 289{
 290    MachineState *machine = MACHINE(spapr);
 291    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 292
 293    if (machine_kernel_irqchip_split(machine)) {
 294        error_setg(errp, "kernel_irqchip split mode not supported on pseries");
 295        return;
 296    }
 297
 298    if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) {
 299        error_setg(errp,
 300                   "kernel_irqchip requested but only available with KVM");
 301        return;
 302    }
 303
 304    if (spapr_irq_check(spapr, errp) < 0) {
 305        return;
 306    }
 307
 308    /* Initialize the MSI IRQ allocator. */
 309    spapr_irq_msi_init(spapr);
 310
 311    if (spapr->irq->xics) {
 312        Error *local_err = NULL;
 313        Object *obj;
 314
 315        obj = object_new(TYPE_ICS_SPAPR);
 316        object_property_add_child(OBJECT(spapr), "ics", obj, &local_err);
 317        if (local_err) {
 318            error_propagate(errp, local_err);
 319            return;
 320        }
 321
 322        object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
 323                                       &local_err);
 324        if (local_err) {
 325            error_propagate(errp, local_err);
 326            return;
 327        }
 328
 329        object_property_set_int(obj, smc->nr_xirqs, "nr-irqs", &local_err);
 330        if (local_err) {
 331            error_propagate(errp, local_err);
 332            return;
 333        }
 334
 335        object_property_set_bool(obj, true, "realized", &local_err);
 336        if (local_err) {
 337            error_propagate(errp, local_err);
 338            return;
 339        }
 340
 341        spapr->ics = ICS_SPAPR(obj);
 342    }
 343
 344    if (spapr->irq->xive) {
 345        uint32_t nr_servers = spapr_max_server_number(spapr);
 346        DeviceState *dev;
 347        int i;
 348
 349        dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
 350        qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
 351        /*
 352         * 8 XIVE END structures per CPU. One for each available
 353         * priority
 354         */
 355        qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
 356        qdev_init_nofail(dev);
 357
 358        spapr->xive = SPAPR_XIVE(dev);
 359
 360        /* Enable the CPU IPIs */
 361        for (i = 0; i < nr_servers; ++i) {
 362            SpaprInterruptControllerClass *sicc
 363                = SPAPR_INTC_GET_CLASS(spapr->xive);
 364
 365            if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
 366                                false, errp) < 0) {
 367                return;
 368            }
 369        }
 370
 371        spapr_xive_hcall_init(spapr);
 372    }
 373
 374    spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
 375                                      smc->nr_xirqs + SPAPR_XIRQ_BASE);
 376
 377    /*
 378     * Mostly we don't actually need this until reset, except that not
 379     * having this set up can cause VFIO devices to issue a
 380     * false-positive warning during realize(), because they don't yet
 381     * have an in-kernel irq chip.
 382     */
 383    spapr_irq_update_active_intc(spapr);
 384}
 385
 386int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
 387{
 388    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 389    int i;
 390    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 391    int rc;
 392
 393    assert(irq >= SPAPR_XIRQ_BASE);
 394    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 395
 396    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
 397        SpaprInterruptController *intc = intcs[i];
 398        if (intc) {
 399            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
 400            rc = sicc->claim_irq(intc, irq, lsi, errp);
 401            if (rc < 0) {
 402                return rc;
 403            }
 404        }
 405    }
 406
 407    return 0;
 408}
 409
 410void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
 411{
 412    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
 413    int i, j;
 414    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 415
 416    assert(irq >= SPAPR_XIRQ_BASE);
 417    assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 418
 419    for (i = irq; i < (irq + num); i++) {
 420        for (j = 0; j < ARRAY_SIZE(intcs); j++) {
 421            SpaprInterruptController *intc = intcs[j];
 422
 423            if (intc) {
 424                SpaprInterruptControllerClass *sicc
 425                    = SPAPR_INTC_GET_CLASS(intc);
 426                sicc->free_irq(intc, i);
 427            }
 428        }
 429    }
 430}
 431
 432qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
 433{
 434    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 435
 436    /*
 437     * This interface is basically for VIO and PHB devices to find the
 438     * right qemu_irq to manipulate, so we only allow access to the
 439     * external irqs for now.  Currently anything which needs to
 440     * access the IPIs most naturally gets there via the guest side
 441     * interfaces, we can change this if we need to in future.
 442     */
 443    assert(irq >= SPAPR_XIRQ_BASE);
 444    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
 445
 446    if (spapr->ics) {
 447        assert(ics_valid_irq(spapr->ics, irq));
 448    }
 449    if (spapr->xive) {
 450        assert(irq < spapr->xive->nr_irqs);
 451        assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
 452    }
 453
 454    return spapr->qirqs[irq];
 455}
 456
 457int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
 458{
 459    SpaprInterruptControllerClass *sicc;
 460
 461    spapr_irq_update_active_intc(spapr);
 462    sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 463    return sicc->post_load(spapr->active_intc, version_id);
 464}
 465
 466void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
 467{
 468    assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
 469
 470    spapr_irq_update_active_intc(spapr);
 471}
 472
 473int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
 474{
 475    const char *nodename = "interrupt-controller";
 476    int offset, phandle;
 477
 478    offset = fdt_subnode_offset(fdt, 0, nodename);
 479    if (offset < 0) {
 480        error_setg(errp, "Can't find node \"%s\": %s",
 481                   nodename, fdt_strerror(offset));
 482        return -1;
 483    }
 484
 485    phandle = fdt_get_phandle(fdt, offset);
 486    if (!phandle) {
 487        error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
 488        return -1;
 489    }
 490
 491    return phandle;
 492}
 493
 494static void set_active_intc(SpaprMachineState *spapr,
 495                            SpaprInterruptController *new_intc)
 496{
 497    SpaprInterruptControllerClass *sicc;
 498
 499    assert(new_intc);
 500
 501    if (new_intc == spapr->active_intc) {
 502        /* Nothing to do */
 503        return;
 504    }
 505
 506    if (spapr->active_intc) {
 507        sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
 508        if (sicc->deactivate) {
 509            sicc->deactivate(spapr->active_intc);
 510        }
 511    }
 512
 513    sicc = SPAPR_INTC_GET_CLASS(new_intc);
 514    if (sicc->activate) {
 515        sicc->activate(new_intc, &error_fatal);
 516    }
 517
 518    spapr->active_intc = new_intc;
 519
 520    /*
 521     * We've changed the kernel irqchip, let VFIO devices know they
 522     * need to readjust.
 523     */
 524    kvm_irqchip_change_notify();
 525}
 526
 527void spapr_irq_update_active_intc(SpaprMachineState *spapr)
 528{
 529    SpaprInterruptController *new_intc;
 530
 531    if (!spapr->ics) {
 532        /*
 533         * XXX before we run CAS, ov5_cas is initialized empty, which
 534         * indicates XICS, even if we have ic-mode=xive.  TODO: clean
 535         * up the CAS path so that we have a clearer way of handling
 536         * this.
 537         */
 538        new_intc = SPAPR_INTC(spapr->xive);
 539    } else if (spapr->ov5_cas
 540               && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
 541        new_intc = SPAPR_INTC(spapr->xive);
 542    } else {
 543        new_intc = SPAPR_INTC(spapr->ics);
 544    }
 545
 546    set_active_intc(spapr, new_intc);
 547}
 548
 549/*
 550 * XICS legacy routines - to deprecate one day
 551 */
 552
 553static int ics_find_free_block(ICSState *ics, int num, int alignnum)
 554{
 555    int first, i;
 556
 557    for (first = 0; first < ics->nr_irqs; first += alignnum) {
 558        if (num > (ics->nr_irqs - first)) {
 559            return -1;
 560        }
 561        for (i = first; i < first + num; ++i) {
 562            if (!ics_irq_free(ics, i)) {
 563                break;
 564            }
 565        }
 566        if (i == (first + num)) {
 567            return first;
 568        }
 569    }
 570
 571    return -1;
 572}
 573
 574int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
 575{
 576    ICSState *ics = spapr->ics;
 577    int first = -1;
 578
 579    assert(ics);
 580
 581    /*
 582     * MSIMesage::data is used for storing VIRQ so
 583     * it has to be aligned to num to support multiple
 584     * MSI vectors. MSI-X is not affected by this.
 585     * The hint is used for the first IRQ, the rest should
 586     * be allocated continuously.
 587     */
 588    if (align) {
 589        assert((num == 1) || (num == 2) || (num == 4) ||
 590               (num == 8) || (num == 16) || (num == 32));
 591        first = ics_find_free_block(ics, num, num);
 592    } else {
 593        first = ics_find_free_block(ics, num, 1);
 594    }
 595
 596    if (first < 0) {
 597        error_setg(errp, "can't find a free %d-IRQ block", num);
 598        return -1;
 599    }
 600
 601    return first + ics->offset;
 602}
 603
 604SpaprIrq spapr_irq_xics_legacy = {
 605    .xics        = true,
 606    .xive        = false,
 607};
 608
 609static void spapr_irq_register_types(void)
 610{
 611    type_register_static(&spapr_intc_info);
 612}
 613
 614type_init(spapr_irq_register_types)
 615