qemu/hw/ppc/spapr_pci.c
<<
>>
Prefs
   1/*
   2 * QEMU sPAPR PCI host originated from Uninorth PCI host
   3 *
   4 * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
   5 * Copyright (C) 2011 David Gibson, IBM Corporation.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "hw/hw.h"
  30#include "hw/sysbus.h"
  31#include "hw/pci/pci.h"
  32#include "hw/pci/msi.h"
  33#include "hw/pci/msix.h"
  34#include "hw/pci/pci_host.h"
  35#include "hw/ppc/spapr.h"
  36#include "hw/pci-host/spapr.h"
  37#include "exec/address-spaces.h"
  38#include "exec/ram_addr.h"
  39#include <libfdt.h>
  40#include "trace.h"
  41#include "qemu/error-report.h"
  42#include "qapi/qmp/qerror.h"
  43
  44#include "hw/pci/pci_bridge.h"
  45#include "hw/pci/pci_bus.h"
  46#include "hw/ppc/spapr_drc.h"
  47#include "sysemu/device_tree.h"
  48#include "sysemu/kvm.h"
  49#include "sysemu/hostmem.h"
  50#include "sysemu/numa.h"
  51
  52#include "hw/vfio/vfio.h"
  53
  54/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
  55#define RTAS_QUERY_FN           0
  56#define RTAS_CHANGE_FN          1
  57#define RTAS_RESET_FN           2
  58#define RTAS_CHANGE_MSI_FN      3
  59#define RTAS_CHANGE_MSIX_FN     4
  60
  61/* Interrupt types to return on RTAS_CHANGE_* */
  62#define RTAS_TYPE_MSI           1
  63#define RTAS_TYPE_MSIX          2
  64
  65#define FDT_NAME_MAX          128
  66
  67#define _FDT(exp) \
  68    do { \
  69        int ret = (exp);                                           \
  70        if (ret < 0) {                                             \
  71            return ret;                                            \
  72        }                                                          \
  73    } while (0)
  74
  75sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid)
  76{
  77    sPAPRPHBState *sphb;
  78
  79    QLIST_FOREACH(sphb, &spapr->phbs, list) {
  80        if (sphb->buid != buid) {
  81            continue;
  82        }
  83        return sphb;
  84    }
  85
  86    return NULL;
  87}
  88
  89PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
  90                              uint32_t config_addr)
  91{
  92    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
  93    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
  94    int bus_num = (config_addr >> 16) & 0xFF;
  95    int devfn = (config_addr >> 8) & 0xFF;
  96
  97    if (!phb) {
  98        return NULL;
  99    }
 100
 101    return pci_find_device(phb->bus, bus_num, devfn);
 102}
 103
 104static uint32_t rtas_pci_cfgaddr(uint32_t arg)
 105{
 106    /* This handles the encoding of extended config space addresses */
 107    return ((arg >> 20) & 0xf00) | (arg & 0xff);
 108}
 109
 110static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid,
 111                                   uint32_t addr, uint32_t size,
 112                                   target_ulong rets)
 113{
 114    PCIDevice *pci_dev;
 115    uint32_t val;
 116
 117    if ((size != 1) && (size != 2) && (size != 4)) {
 118        /* access must be 1, 2 or 4 bytes */
 119        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 120        return;
 121    }
 122
 123    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 124    addr = rtas_pci_cfgaddr(addr);
 125
 126    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 127        /* Access must be to a valid device, within bounds and
 128         * naturally aligned */
 129        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 130        return;
 131    }
 132
 133    val = pci_host_config_read_common(pci_dev, addr,
 134                                      pci_config_size(pci_dev), size);
 135
 136    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 137    rtas_st(rets, 1, val);
 138}
 139
 140static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 141                                     uint32_t token, uint32_t nargs,
 142                                     target_ulong args,
 143                                     uint32_t nret, target_ulong rets)
 144{
 145    uint64_t buid;
 146    uint32_t size, addr;
 147
 148    if ((nargs != 4) || (nret != 2)) {
 149        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 150        return;
 151    }
 152
 153    buid = rtas_ldq(args, 1);
 154    size = rtas_ld(args, 3);
 155    addr = rtas_ld(args, 0);
 156
 157    finish_read_pci_config(spapr, buid, addr, size, rets);
 158}
 159
 160static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 161                                 uint32_t token, uint32_t nargs,
 162                                 target_ulong args,
 163                                 uint32_t nret, target_ulong rets)
 164{
 165    uint32_t size, addr;
 166
 167    if ((nargs != 2) || (nret != 2)) {
 168        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 169        return;
 170    }
 171
 172    size = rtas_ld(args, 1);
 173    addr = rtas_ld(args, 0);
 174
 175    finish_read_pci_config(spapr, 0, addr, size, rets);
 176}
 177
 178static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid,
 179                                    uint32_t addr, uint32_t size,
 180                                    uint32_t val, target_ulong rets)
 181{
 182    PCIDevice *pci_dev;
 183
 184    if ((size != 1) && (size != 2) && (size != 4)) {
 185        /* access must be 1, 2 or 4 bytes */
 186        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 187        return;
 188    }
 189
 190    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 191    addr = rtas_pci_cfgaddr(addr);
 192
 193    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 194        /* Access must be to a valid device, within bounds and
 195         * naturally aligned */
 196        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 197        return;
 198    }
 199
 200    pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
 201                                 val, size);
 202
 203    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 204}
 205
 206static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 207                                      uint32_t token, uint32_t nargs,
 208                                      target_ulong args,
 209                                      uint32_t nret, target_ulong rets)
 210{
 211    uint64_t buid;
 212    uint32_t val, size, addr;
 213
 214    if ((nargs != 5) || (nret != 1)) {
 215        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 216        return;
 217    }
 218
 219    buid = rtas_ldq(args, 1);
 220    val = rtas_ld(args, 4);
 221    size = rtas_ld(args, 3);
 222    addr = rtas_ld(args, 0);
 223
 224    finish_write_pci_config(spapr, buid, addr, size, val, rets);
 225}
 226
 227static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 228                                  uint32_t token, uint32_t nargs,
 229                                  target_ulong args,
 230                                  uint32_t nret, target_ulong rets)
 231{
 232    uint32_t val, size, addr;
 233
 234    if ((nargs != 3) || (nret != 1)) {
 235        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 236        return;
 237    }
 238
 239
 240    val = rtas_ld(args, 2);
 241    size = rtas_ld(args, 1);
 242    addr = rtas_ld(args, 0);
 243
 244    finish_write_pci_config(spapr, 0, addr, size, val, rets);
 245}
 246
 247/*
 248 * Set MSI/MSIX message data.
 249 * This is required for msi_notify()/msix_notify() which
 250 * will write at the addresses via spapr_msi_write().
 251 *
 252 * If hwaddr == 0, all entries will have .data == first_irq i.e.
 253 * table will be reset.
 254 */
 255static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
 256                             unsigned first_irq, unsigned req_num)
 257{
 258    unsigned i;
 259    MSIMessage msg = { .address = addr, .data = first_irq };
 260
 261    if (!msix) {
 262        msi_set_message(pdev, msg);
 263        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
 264        return;
 265    }
 266
 267    for (i = 0; i < req_num; ++i) {
 268        msix_set_message(pdev, i, msg);
 269        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
 270        if (addr) {
 271            ++msg.data;
 272        }
 273    }
 274}
 275
 276static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 277                                uint32_t token, uint32_t nargs,
 278                                target_ulong args, uint32_t nret,
 279                                target_ulong rets)
 280{
 281    uint32_t config_addr = rtas_ld(args, 0);
 282    uint64_t buid = rtas_ldq(args, 1);
 283    unsigned int func = rtas_ld(args, 3);
 284    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
 285    unsigned int seq_num = rtas_ld(args, 5);
 286    unsigned int ret_intr_type;
 287    unsigned int irq, max_irqs = 0;
 288    sPAPRPHBState *phb = NULL;
 289    PCIDevice *pdev = NULL;
 290    spapr_pci_msi *msi;
 291    int *config_addr_key;
 292    Error *err = NULL;
 293
 294    switch (func) {
 295    case RTAS_CHANGE_MSI_FN:
 296    case RTAS_CHANGE_FN:
 297        ret_intr_type = RTAS_TYPE_MSI;
 298        break;
 299    case RTAS_CHANGE_MSIX_FN:
 300        ret_intr_type = RTAS_TYPE_MSIX;
 301        break;
 302    default:
 303        error_report("rtas_ibm_change_msi(%u) is not implemented", func);
 304        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 305        return;
 306    }
 307
 308    /* Fins sPAPRPHBState */
 309    phb = spapr_pci_find_phb(spapr, buid);
 310    if (phb) {
 311        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 312    }
 313    if (!phb || !pdev) {
 314        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 315        return;
 316    }
 317
 318    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 319
 320    /* Releasing MSIs */
 321    if (!req_num) {
 322        if (!msi) {
 323            trace_spapr_pci_msi("Releasing wrong config", config_addr);
 324            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 325            return;
 326        }
 327
 328        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
 329        if (msi_present(pdev)) {
 330            spapr_msi_setmsg(pdev, 0, false, 0, 0);
 331        }
 332        if (msix_present(pdev)) {
 333            spapr_msi_setmsg(pdev, 0, true, 0, 0);
 334        }
 335        g_hash_table_remove(phb->msi, &config_addr);
 336
 337        trace_spapr_pci_msi("Released MSIs", config_addr);
 338        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 339        rtas_st(rets, 1, 0);
 340        return;
 341    }
 342
 343    /* Enabling MSI */
 344
 345    /* Check if the device supports as many IRQs as requested */
 346    if (ret_intr_type == RTAS_TYPE_MSI) {
 347        max_irqs = msi_nr_vectors_allocated(pdev);
 348    } else if (ret_intr_type == RTAS_TYPE_MSIX) {
 349        max_irqs = pdev->msix_entries_nr;
 350    }
 351    if (!max_irqs) {
 352        error_report("Requested interrupt type %d is not enabled for device %x",
 353                     ret_intr_type, config_addr);
 354        rtas_st(rets, 0, -1); /* Hardware error */
 355        return;
 356    }
 357    /* Correct the number if the guest asked for too many */
 358    if (req_num > max_irqs) {
 359        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
 360        req_num = max_irqs;
 361        irq = 0; /* to avoid misleading trace */
 362        goto out;
 363    }
 364
 365    /* Allocate MSIs */
 366    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
 367                           ret_intr_type == RTAS_TYPE_MSI, &err);
 368    if (err) {
 369        error_reportf_err(err, "Can't allocate MSIs for device %x: ",
 370                          config_addr);
 371        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 372        return;
 373    }
 374
 375    /* Release previous MSIs */
 376    if (msi) {
 377        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
 378        g_hash_table_remove(phb->msi, &config_addr);
 379    }
 380
 381    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
 382    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
 383                     irq, req_num);
 384
 385    /* Add MSI device to cache */
 386    msi = g_new(spapr_pci_msi, 1);
 387    msi->first_irq = irq;
 388    msi->num = req_num;
 389    config_addr_key = g_new(int, 1);
 390    *config_addr_key = config_addr;
 391    g_hash_table_insert(phb->msi, config_addr_key, msi);
 392
 393out:
 394    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 395    rtas_st(rets, 1, req_num);
 396    rtas_st(rets, 2, ++seq_num);
 397    if (nret > 3) {
 398        rtas_st(rets, 3, ret_intr_type);
 399    }
 400
 401    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
 402}
 403
 404static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
 405                                                   sPAPRMachineState *spapr,
 406                                                   uint32_t token,
 407                                                   uint32_t nargs,
 408                                                   target_ulong args,
 409                                                   uint32_t nret,
 410                                                   target_ulong rets)
 411{
 412    uint32_t config_addr = rtas_ld(args, 0);
 413    uint64_t buid = rtas_ldq(args, 1);
 414    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
 415    sPAPRPHBState *phb = NULL;
 416    PCIDevice *pdev = NULL;
 417    spapr_pci_msi *msi;
 418
 419    /* Find sPAPRPHBState */
 420    phb = spapr_pci_find_phb(spapr, buid);
 421    if (phb) {
 422        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 423    }
 424    if (!phb || !pdev) {
 425        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 426        return;
 427    }
 428
 429    /* Find device descriptor and start IRQ */
 430    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 431    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
 432        trace_spapr_pci_msi("Failed to return vector", config_addr);
 433        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 434        return;
 435    }
 436    intr_src_num = msi->first_irq + ioa_intr_num;
 437    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
 438                                                           intr_src_num);
 439
 440    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 441    rtas_st(rets, 1, intr_src_num);
 442    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
 443}
 444
 445static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
 446                                    sPAPRMachineState *spapr,
 447                                    uint32_t token, uint32_t nargs,
 448                                    target_ulong args, uint32_t nret,
 449                                    target_ulong rets)
 450{
 451    sPAPRPHBState *sphb;
 452    uint32_t addr, option;
 453    uint64_t buid;
 454    int ret;
 455
 456    if ((nargs != 4) || (nret != 1)) {
 457        goto param_error_exit;
 458    }
 459
 460    buid = rtas_ldq(args, 1);
 461    addr = rtas_ld(args, 0);
 462    option = rtas_ld(args, 3);
 463
 464    sphb = spapr_pci_find_phb(spapr, buid);
 465    if (!sphb) {
 466        goto param_error_exit;
 467    }
 468
 469    if (!spapr_phb_eeh_available(sphb)) {
 470        goto param_error_exit;
 471    }
 472
 473    ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
 474    rtas_st(rets, 0, ret);
 475    return;
 476
 477param_error_exit:
 478    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 479}
 480
 481static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
 482                                           sPAPRMachineState *spapr,
 483                                           uint32_t token, uint32_t nargs,
 484                                           target_ulong args, uint32_t nret,
 485                                           target_ulong rets)
 486{
 487    sPAPRPHBState *sphb;
 488    PCIDevice *pdev;
 489    uint32_t addr, option;
 490    uint64_t buid;
 491
 492    if ((nargs != 4) || (nret != 2)) {
 493        goto param_error_exit;
 494    }
 495
 496    buid = rtas_ldq(args, 1);
 497    sphb = spapr_pci_find_phb(spapr, buid);
 498    if (!sphb) {
 499        goto param_error_exit;
 500    }
 501
 502    if (!spapr_phb_eeh_available(sphb)) {
 503        goto param_error_exit;
 504    }
 505
 506    /*
 507     * We always have PE address of form "00BB0001". "BB"
 508     * represents the bus number of PE's primary bus.
 509     */
 510    option = rtas_ld(args, 3);
 511    switch (option) {
 512    case RTAS_GET_PE_ADDR:
 513        addr = rtas_ld(args, 0);
 514        pdev = spapr_pci_find_dev(spapr, buid, addr);
 515        if (!pdev) {
 516            goto param_error_exit;
 517        }
 518
 519        rtas_st(rets, 1, (pci_bus_num(pdev->bus) << 16) + 1);
 520        break;
 521    case RTAS_GET_PE_MODE:
 522        rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
 523        break;
 524    default:
 525        goto param_error_exit;
 526    }
 527
 528    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 529    return;
 530
 531param_error_exit:
 532    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 533}
 534
 535static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
 536                                            sPAPRMachineState *spapr,
 537                                            uint32_t token, uint32_t nargs,
 538                                            target_ulong args, uint32_t nret,
 539                                            target_ulong rets)
 540{
 541    sPAPRPHBState *sphb;
 542    uint64_t buid;
 543    int state, ret;
 544
 545    if ((nargs != 3) || (nret != 4 && nret != 5)) {
 546        goto param_error_exit;
 547    }
 548
 549    buid = rtas_ldq(args, 1);
 550    sphb = spapr_pci_find_phb(spapr, buid);
 551    if (!sphb) {
 552        goto param_error_exit;
 553    }
 554
 555    if (!spapr_phb_eeh_available(sphb)) {
 556        goto param_error_exit;
 557    }
 558
 559    ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
 560    rtas_st(rets, 0, ret);
 561    if (ret != RTAS_OUT_SUCCESS) {
 562        return;
 563    }
 564
 565    rtas_st(rets, 1, state);
 566    rtas_st(rets, 2, RTAS_EEH_SUPPORT);
 567    rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
 568    if (nret >= 5) {
 569        rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
 570    }
 571    return;
 572
 573param_error_exit:
 574    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 575}
 576
 577static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
 578                                    sPAPRMachineState *spapr,
 579                                    uint32_t token, uint32_t nargs,
 580                                    target_ulong args, uint32_t nret,
 581                                    target_ulong rets)
 582{
 583    sPAPRPHBState *sphb;
 584    uint32_t option;
 585    uint64_t buid;
 586    int ret;
 587
 588    if ((nargs != 4) || (nret != 1)) {
 589        goto param_error_exit;
 590    }
 591
 592    buid = rtas_ldq(args, 1);
 593    option = rtas_ld(args, 3);
 594    sphb = spapr_pci_find_phb(spapr, buid);
 595    if (!sphb) {
 596        goto param_error_exit;
 597    }
 598
 599    if (!spapr_phb_eeh_available(sphb)) {
 600        goto param_error_exit;
 601    }
 602
 603    ret = spapr_phb_vfio_eeh_reset(sphb, option);
 604    rtas_st(rets, 0, ret);
 605    return;
 606
 607param_error_exit:
 608    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 609}
 610
 611static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
 612                                  sPAPRMachineState *spapr,
 613                                  uint32_t token, uint32_t nargs,
 614                                  target_ulong args, uint32_t nret,
 615                                  target_ulong rets)
 616{
 617    sPAPRPHBState *sphb;
 618    uint64_t buid;
 619    int ret;
 620
 621    if ((nargs != 3) || (nret != 1)) {
 622        goto param_error_exit;
 623    }
 624
 625    buid = rtas_ldq(args, 1);
 626    sphb = spapr_pci_find_phb(spapr, buid);
 627    if (!sphb) {
 628        goto param_error_exit;
 629    }
 630
 631    if (!spapr_phb_eeh_available(sphb)) {
 632        goto param_error_exit;
 633    }
 634
 635    ret = spapr_phb_vfio_eeh_configure(sphb);
 636    rtas_st(rets, 0, ret);
 637    return;
 638
 639param_error_exit:
 640    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 641}
 642
 643/* To support it later */
 644static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
 645                                       sPAPRMachineState *spapr,
 646                                       uint32_t token, uint32_t nargs,
 647                                       target_ulong args, uint32_t nret,
 648                                       target_ulong rets)
 649{
 650    sPAPRPHBState *sphb;
 651    int option;
 652    uint64_t buid;
 653
 654    if ((nargs != 8) || (nret != 1)) {
 655        goto param_error_exit;
 656    }
 657
 658    buid = rtas_ldq(args, 1);
 659    sphb = spapr_pci_find_phb(spapr, buid);
 660    if (!sphb) {
 661        goto param_error_exit;
 662    }
 663
 664    if (!spapr_phb_eeh_available(sphb)) {
 665        goto param_error_exit;
 666    }
 667
 668    option = rtas_ld(args, 7);
 669    switch (option) {
 670    case RTAS_SLOT_TEMP_ERR_LOG:
 671    case RTAS_SLOT_PERM_ERR_LOG:
 672        break;
 673    default:
 674        goto param_error_exit;
 675    }
 676
 677    /* We don't have error log yet */
 678    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 679    return;
 680
 681param_error_exit:
 682    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 683}
 684
 685static int pci_spapr_swizzle(int slot, int pin)
 686{
 687    return (slot + pin) % PCI_NUM_PINS;
 688}
 689
 690static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
 691{
 692    /*
 693     * Here we need to convert pci_dev + irq_num to some unique value
 694     * which is less than number of IRQs on the specific bus (4).  We
 695     * use standard PCI swizzling, that is (slot number + pin number)
 696     * % 4.
 697     */
 698    return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
 699}
 700
 701static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
 702{
 703    /*
 704     * Here we use the number returned by pci_spapr_map_irq to find a
 705     * corresponding qemu_irq.
 706     */
 707    sPAPRPHBState *phb = opaque;
 708
 709    trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
 710    qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
 711}
 712
 713static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
 714{
 715    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
 716    PCIINTxRoute route;
 717
 718    route.mode = PCI_INTX_ENABLED;
 719    route.irq = sphb->lsi_table[pin].irq;
 720
 721    return route;
 722}
 723
 724/*
 725 * MSI/MSIX memory region implementation.
 726 * The handler handles both MSI and MSIX.
 727 * For MSI-X, the vector number is encoded as a part of the address,
 728 * data is set to 0.
 729 * For MSI, the vector number is encoded in least bits in data.
 730 */
 731static void spapr_msi_write(void *opaque, hwaddr addr,
 732                            uint64_t data, unsigned size)
 733{
 734    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 735    uint32_t irq = data;
 736
 737    trace_spapr_pci_msi_write(addr, data, irq);
 738
 739    qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
 740}
 741
 742static const MemoryRegionOps spapr_msi_ops = {
 743    /* There is no .read as the read result is undefined by PCI spec */
 744    .read = NULL,
 745    .write = spapr_msi_write,
 746    .endianness = DEVICE_LITTLE_ENDIAN
 747};
 748
 749/*
 750 * PHB PCI device
 751 */
 752static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 753{
 754    sPAPRPHBState *phb = opaque;
 755
 756    return &phb->iommu_as;
 757}
 758
 759static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb,  PCIDevice *pdev)
 760{
 761    char *path = NULL, *buf = NULL, *host = NULL;
 762
 763    /* Get the PCI VFIO host id */
 764    host = object_property_get_str(OBJECT(pdev), "host", NULL);
 765    if (!host) {
 766        goto err_out;
 767    }
 768
 769    /* Construct the path of the file that will give us the DT location */
 770    path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
 771    g_free(host);
 772    if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
 773        goto err_out;
 774    }
 775    g_free(path);
 776
 777    /* Construct and read from host device tree the loc-code */
 778    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
 779    g_free(buf);
 780    if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
 781        goto err_out;
 782    }
 783    return buf;
 784
 785err_out:
 786    g_free(path);
 787    return NULL;
 788}
 789
 790static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev)
 791{
 792    char *buf;
 793    const char *devtype = "qemu";
 794    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
 795
 796    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
 797        buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
 798        if (buf) {
 799            return buf;
 800        }
 801        devtype = "vfio";
 802    }
 803    /*
 804     * For emulated devices and VFIO-failure case, make up
 805     * the loc-code.
 806     */
 807    buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
 808                          devtype, pdev->name, sphb->index, busnr,
 809                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 810    return buf;
 811}
 812
 813/* Macros to operate with address in OF binding to PCI */
 814#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 815#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
 816#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
 817#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
 818#define b_ss(x)         b_x((x), 24, 2) /* the space code */
 819#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
 820#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
 821#define b_fff(x)        b_x((x), 8, 3)  /* function number */
 822#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
 823
 824/* for 'reg'/'assigned-addresses' OF properties */
 825#define RESOURCE_CELLS_SIZE 2
 826#define RESOURCE_CELLS_ADDRESS 3
 827
 828typedef struct ResourceFields {
 829    uint32_t phys_hi;
 830    uint32_t phys_mid;
 831    uint32_t phys_lo;
 832    uint32_t size_hi;
 833    uint32_t size_lo;
 834} QEMU_PACKED ResourceFields;
 835
 836typedef struct ResourceProps {
 837    ResourceFields reg[8];
 838    ResourceFields assigned[7];
 839    uint32_t reg_len;
 840    uint32_t assigned_len;
 841} ResourceProps;
 842
 843/* fill in the 'reg'/'assigned-resources' OF properties for
 844 * a PCI device. 'reg' describes resource requirements for a
 845 * device's IO/MEM regions, 'assigned-addresses' describes the
 846 * actual resource assignments.
 847 *
 848 * the properties are arrays of ('phys-addr', 'size') pairs describing
 849 * the addressable regions of the PCI device, where 'phys-addr' is a
 850 * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
 851 * (phys.hi, phys.mid, phys.lo), and 'size' is a
 852 * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
 853 *
 854 * phys.hi = 0xYYXXXXZZ, where:
 855 *   0xYY = npt000ss
 856 *          |||   |
 857 *          |||   +-- space code
 858 *          |||               |
 859 *          |||               +  00 if configuration space
 860 *          |||               +  01 if IO region,
 861 *          |||               +  10 if 32-bit MEM region
 862 *          |||               +  11 if 64-bit MEM region
 863 *          |||
 864 *          ||+------ for non-relocatable IO: 1 if aliased
 865 *          ||        for relocatable IO: 1 if below 64KB
 866 *          ||        for MEM: 1 if below 1MB
 867 *          |+------- 1 if region is prefetchable
 868 *          +-------- 1 if region is non-relocatable
 869 *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
 870 *            bits respectively
 871 *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
 872 *          to the region
 873 *
 874 * phys.mid and phys.lo correspond respectively to the hi/lo portions
 875 * of the actual address of the region.
 876 *
 877 * how the phys-addr/size values are used differ slightly between
 878 * 'reg' and 'assigned-addresses' properties. namely, 'reg' has
 879 * an additional description for the config space region of the
 880 * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0
 881 * to describe the region as relocatable, with an address-mapping
 882 * that corresponds directly to the PHB's address space for the
 883 * resource. 'assigned-addresses' always has n=1 set with an absolute
 884 * address assigned for the resource. in general, 'assigned-addresses'
 885 * won't be populated, since addresses for PCI devices are generally
 886 * unmapped initially and left to the guest to assign.
 887 *
 888 * note also that addresses defined in these properties are, at least
 889 * for PAPR guests, relative to the PHBs IO/MEM windows, and
 890 * correspond directly to the addresses in the BARs.
 891 *
 892 * in accordance with PCI Bus Binding to Open Firmware,
 893 * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
 894 * Appendix C.
 895 */
 896static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
 897{
 898    int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
 899    uint32_t dev_id = (b_bbbbbbbb(bus_num) |
 900                       b_ddddd(PCI_SLOT(d->devfn)) |
 901                       b_fff(PCI_FUNC(d->devfn)));
 902    ResourceFields *reg, *assigned;
 903    int i, reg_idx = 0, assigned_idx = 0;
 904
 905    /* config space region */
 906    reg = &rp->reg[reg_idx++];
 907    reg->phys_hi = cpu_to_be32(dev_id);
 908    reg->phys_mid = 0;
 909    reg->phys_lo = 0;
 910    reg->size_hi = 0;
 911    reg->size_lo = 0;
 912
 913    for (i = 0; i < PCI_NUM_REGIONS; i++) {
 914        if (!d->io_regions[i].size) {
 915            continue;
 916        }
 917
 918        reg = &rp->reg[reg_idx++];
 919
 920        reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
 921        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
 922            reg->phys_hi |= cpu_to_be32(b_ss(1));
 923        } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
 924            reg->phys_hi |= cpu_to_be32(b_ss(3));
 925        } else {
 926            reg->phys_hi |= cpu_to_be32(b_ss(2));
 927        }
 928        reg->phys_mid = 0;
 929        reg->phys_lo = 0;
 930        reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
 931        reg->size_lo = cpu_to_be32(d->io_regions[i].size);
 932
 933        if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) {
 934            continue;
 935        }
 936
 937        assigned = &rp->assigned[assigned_idx++];
 938        assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1));
 939        assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32);
 940        assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr);
 941        assigned->size_hi = reg->size_hi;
 942        assigned->size_lo = reg->size_lo;
 943    }
 944
 945    rp->reg_len = reg_idx * sizeof(ResourceFields);
 946    rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 947}
 948
 949static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
 950                                            PCIDevice *pdev);
 951
 952static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
 953                                       sPAPRPHBState *sphb)
 954{
 955    ResourceProps rp;
 956    bool is_bridge = false;
 957    int pci_status, err;
 958    char *buf = NULL;
 959    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
 960    uint32_t max_msi, max_msix;
 961
 962    if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
 963        PCI_HEADER_TYPE_BRIDGE) {
 964        is_bridge = true;
 965    }
 966
 967    /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
 968    _FDT(fdt_setprop_cell(fdt, offset, "vendor-id",
 969                          pci_default_read_config(dev, PCI_VENDOR_ID, 2)));
 970    _FDT(fdt_setprop_cell(fdt, offset, "device-id",
 971                          pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
 972    _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
 973                          pci_default_read_config(dev, PCI_REVISION_ID, 1)));
 974    _FDT(fdt_setprop_cell(fdt, offset, "class-code",
 975                          pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
 976    if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
 977        _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
 978                 pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
 979    }
 980
 981    if (!is_bridge) {
 982        _FDT(fdt_setprop_cell(fdt, offset, "min-grant",
 983            pci_default_read_config(dev, PCI_MIN_GNT, 1)));
 984        _FDT(fdt_setprop_cell(fdt, offset, "max-latency",
 985            pci_default_read_config(dev, PCI_MAX_LAT, 1)));
 986    }
 987
 988    if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) {
 989        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id",
 990                 pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)));
 991    }
 992
 993    if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) {
 994        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
 995                 pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)));
 996    }
 997
 998    _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size",
 999        pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1)));
1000
1001    /* the following fdt cells are masked off the pci status register */
1002    pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
1003    _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
1004                          PCI_STATUS_DEVSEL_MASK & pci_status));
1005
1006    if (pci_status & PCI_STATUS_FAST_BACK) {
1007        _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
1008    }
1009    if (pci_status & PCI_STATUS_66MHZ) {
1010        _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
1011    }
1012    if (pci_status & PCI_STATUS_UDF) {
1013        _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
1014    }
1015
1016    /* NOTE: this is normally generated by firmware via path/unit name,
1017     * but in our case we must set it manually since it does not get
1018     * processed by OF beforehand
1019     */
1020    _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
1021    buf = spapr_phb_get_loc_code(sphb, dev);
1022    if (!buf) {
1023        error_report("Failed setting the ibm,loc-code");
1024        return -1;
1025    }
1026
1027    err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf);
1028    g_free(buf);
1029    if (err < 0) {
1030        return err;
1031    }
1032
1033    if (drc_index) {
1034        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
1035    }
1036
1037    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
1038                          RESOURCE_CELLS_ADDRESS));
1039    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
1040                          RESOURCE_CELLS_SIZE));
1041
1042    max_msi = msi_nr_vectors_allocated(dev);
1043    if (max_msi) {
1044        _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
1045    }
1046    max_msix = dev->msix_entries_nr;
1047    if (max_msix) {
1048        _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
1049    }
1050
1051    populate_resource_props(dev, &rp);
1052    _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
1053    _FDT(fdt_setprop(fdt, offset, "assigned-addresses",
1054                     (uint8_t *)rp.assigned, rp.assigned_len));
1055
1056    return 0;
1057}
1058
1059/* create OF node for pci device and required OF DT properties */
1060static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
1061                                     void *fdt, int node_offset)
1062{
1063    int offset, ret;
1064    int slot = PCI_SLOT(dev->devfn);
1065    int func = PCI_FUNC(dev->devfn);
1066    char nodename[FDT_NAME_MAX];
1067
1068    if (func != 0) {
1069        snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
1070    } else {
1071        snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
1072    }
1073    offset = fdt_add_subnode(fdt, node_offset, nodename);
1074    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
1075
1076    g_assert(!ret);
1077    if (ret) {
1078        return 0;
1079    }
1080    return offset;
1081}
1082
1083static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
1084                                     sPAPRPHBState *phb,
1085                                     PCIDevice *pdev,
1086                                     Error **errp)
1087{
1088    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
1089    DeviceState *dev = DEVICE(pdev);
1090    void *fdt = NULL;
1091    int fdt_start_offset = 0, fdt_size;
1092
1093    fdt = create_device_tree(&fdt_size);
1094    fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
1095    if (!fdt_start_offset) {
1096        error_setg(errp, "Failed to create pci child device tree node");
1097        goto out;
1098    }
1099
1100    drck->attach(drc, DEVICE(pdev),
1101                 fdt, fdt_start_offset, !dev->hotplugged, errp);
1102out:
1103    if (*errp) {
1104        g_free(fdt);
1105    }
1106}
1107
1108static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque)
1109{
1110    /* some version guests do not wait for completion of a device
1111     * cleanup (generally done asynchronously by the kernel) before
1112     * signaling to QEMU that the device is safe, but instead sleep
1113     * for some 'safe' period of time. unfortunately on a busy host
1114     * this sleep isn't guaranteed to be long enough, resulting in
1115     * bad things like IRQ lines being left asserted during final
1116     * device removal. to deal with this we call reset just prior
1117     * to finalizing the device, which will put the device back into
1118     * an 'idle' state, as the device cleanup code expects.
1119     */
1120    pci_device_reset(PCI_DEVICE(dev));
1121    object_unparent(OBJECT(dev));
1122}
1123
1124static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
1125                                        sPAPRPHBState *phb,
1126                                        PCIDevice *pdev,
1127                                        Error **errp)
1128{
1129    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
1130
1131    drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
1132}
1133
1134static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
1135                                                    uint32_t busnr,
1136                                                    int32_t devfn)
1137{
1138    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
1139                                    (phb->index << 16) |
1140                                    (busnr << 8) |
1141                                    devfn);
1142}
1143
1144static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
1145                                               PCIDevice *pdev)
1146{
1147    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
1148    return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn);
1149}
1150
1151static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
1152                                            PCIDevice *pdev)
1153{
1154    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
1155    sPAPRDRConnectorClass *drck;
1156
1157    if (!drc) {
1158        return 0;
1159    }
1160
1161    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
1162    return drck->get_index(drc);
1163}
1164
1165static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
1166                                     DeviceState *plugged_dev, Error **errp)
1167{
1168    sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1169    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1170    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
1171    Error *local_err = NULL;
1172    PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1173    uint32_t slotnr = PCI_SLOT(pdev->devfn);
1174
1175    /* if DR is disabled we don't need to do anything in the case of
1176     * hotplug or coldplug callbacks
1177     */
1178    if (!phb->dr_enabled) {
1179        /* if this is a hotplug operation initiated by the user
1180         * we need to let them know it's not enabled
1181         */
1182        if (plugged_dev->hotplugged) {
1183            error_setg(errp, QERR_BUS_NO_HOTPLUG,
1184                       object_get_typename(OBJECT(phb)));
1185        }
1186        return;
1187    }
1188
1189    g_assert(drc);
1190
1191    /* Following the QEMU convention used for PCIe multifunction
1192     * hotplug, we do not allow functions to be hotplugged to a
1193     * slot that already has function 0 present
1194     */
1195    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
1196        PCI_FUNC(pdev->devfn) != 0) {
1197        error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
1198                   " additional functions can no longer be exposed to guest.",
1199                   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
1200        return;
1201    }
1202
1203    spapr_phb_add_pci_device(drc, phb, pdev, &local_err);
1204    if (local_err) {
1205        error_propagate(errp, local_err);
1206        return;
1207    }
1208
1209    /* If this is function 0, signal hotplug for all the device functions.
1210     * Otherwise defer sending the hotplug event.
1211     */
1212    if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) {
1213        int i;
1214
1215        for (i = 0; i < 8; i++) {
1216            sPAPRDRConnector *func_drc;
1217            sPAPRDRConnectorClass *func_drck;
1218            sPAPRDREntitySense state;
1219
1220            func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1221                                                  PCI_DEVFN(slotnr, i));
1222            func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1223            func_drck->entity_sense(func_drc, &state);
1224
1225            if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1226                spapr_hotplug_req_add_by_index(func_drc);
1227            }
1228        }
1229    }
1230}
1231
1232static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler,
1233                                       DeviceState *plugged_dev, Error **errp)
1234{
1235    sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1236    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1237    sPAPRDRConnectorClass *drck;
1238    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
1239    Error *local_err = NULL;
1240
1241    if (!phb->dr_enabled) {
1242        error_setg(errp, QERR_BUS_NO_HOTPLUG,
1243                   object_get_typename(OBJECT(phb)));
1244        return;
1245    }
1246
1247    g_assert(drc);
1248
1249    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
1250    if (!drck->release_pending(drc)) {
1251        PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1252        uint32_t slotnr = PCI_SLOT(pdev->devfn);
1253        sPAPRDRConnector *func_drc;
1254        sPAPRDRConnectorClass *func_drck;
1255        sPAPRDREntitySense state;
1256        int i;
1257
1258        /* ensure any other present functions are pending unplug */
1259        if (PCI_FUNC(pdev->devfn) == 0) {
1260            for (i = 1; i < 8; i++) {
1261                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1262                                                      PCI_DEVFN(slotnr, i));
1263                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1264                func_drck->entity_sense(func_drc, &state);
1265                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
1266                    && !func_drck->release_pending(func_drc)) {
1267                    error_setg(errp,
1268                               "PCI: slot %d, function %d still present. "
1269                               "Must unplug all non-0 functions first.",
1270                               slotnr, i);
1271                    return;
1272                }
1273            }
1274        }
1275
1276        spapr_phb_remove_pci_device(drc, phb, pdev, &local_err);
1277        if (local_err) {
1278            error_propagate(errp, local_err);
1279            return;
1280        }
1281
1282        /* if this isn't func 0, defer unplug event. otherwise signal removal
1283         * for all present functions
1284         */
1285        if (PCI_FUNC(pdev->devfn) == 0) {
1286            for (i = 7; i >= 0; i--) {
1287                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1288                                                      PCI_DEVFN(slotnr, i));
1289                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1290                func_drck->entity_sense(func_drc, &state);
1291                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1292                    spapr_hotplug_req_remove_by_index(func_drc);
1293                }
1294            }
1295        }
1296    }
1297}
1298
1299static void spapr_phb_realize(DeviceState *dev, Error **errp)
1300{
1301    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1302    SysBusDevice *s = SYS_BUS_DEVICE(dev);
1303    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
1304    PCIHostState *phb = PCI_HOST_BRIDGE(s);
1305    char *namebuf;
1306    int i;
1307    PCIBus *bus;
1308    uint64_t msi_window_size = 4096;
1309    sPAPRTCETable *tcet;
1310    const unsigned windows_supported =
1311        sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
1312
1313    if (sphb->index != (uint32_t)-1) {
1314        sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
1315        Error *local_err = NULL;
1316
1317        if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1)
1318            || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
1319            || (sphb->mem_win_addr != (hwaddr)-1)
1320            || (sphb->mem64_win_addr != (hwaddr)-1)
1321            || (sphb->io_win_addr != (hwaddr)-1)) {
1322            error_setg(errp, "Either \"index\" or other parameters must"
1323                       " be specified for PAPR PHB, not both");
1324            return;
1325        }
1326
1327        smc->phb_placement(spapr, sphb->index,
1328                           &sphb->buid, &sphb->io_win_addr,
1329                           &sphb->mem_win_addr, &sphb->mem64_win_addr,
1330                           windows_supported, sphb->dma_liobn, &local_err);
1331        if (local_err) {
1332            error_propagate(errp, local_err);
1333            return;
1334        }
1335    }
1336
1337    if (sphb->buid == (uint64_t)-1) {
1338        error_setg(errp, "BUID not specified for PHB");
1339        return;
1340    }
1341
1342    if ((sphb->dma_liobn[0] == (uint32_t)-1) ||
1343        ((sphb->dma_liobn[1] == (uint32_t)-1) && (windows_supported > 1))) {
1344        error_setg(errp, "LIOBN(s) not specified for PHB");
1345        return;
1346    }
1347
1348    if (sphb->mem_win_addr == (hwaddr)-1) {
1349        error_setg(errp, "Memory window address not specified for PHB");
1350        return;
1351    }
1352
1353    if (sphb->io_win_addr == (hwaddr)-1) {
1354        error_setg(errp, "IO window address not specified for PHB");
1355        return;
1356    }
1357
1358    if (sphb->mem64_win_size != 0) {
1359        if (sphb->mem64_win_addr == (hwaddr)-1) {
1360            error_setg(errp,
1361                       "64-bit memory window address not specified for PHB");
1362            return;
1363        }
1364
1365        if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1366            error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
1367                       " (max 2 GiB)", sphb->mem_win_size);
1368            return;
1369        }
1370
1371        if (sphb->mem64_win_pciaddr == (hwaddr)-1) {
1372            /* 64-bit window defaults to identity mapping */
1373            sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
1374        }
1375    } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1376        /*
1377         * For compatibility with old configuration, if no 64-bit MMIO
1378         * window is specified, but the ordinary (32-bit) memory
1379         * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
1380         * window, with a 64-bit MMIO window following on immediately
1381         * afterwards
1382         */
1383        sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
1384        sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
1385        sphb->mem64_win_pciaddr =
1386            SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
1387        sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
1388    }
1389
1390    if (spapr_pci_find_phb(spapr, sphb->buid)) {
1391        error_setg(errp, "PCI host bridges must have unique BUIDs");
1392        return;
1393    }
1394
1395    if (sphb->numa_node != -1 &&
1396        (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) {
1397        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
1398        return;
1399    }
1400
1401    sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
1402
1403    namebuf = alloca(strlen(sphb->dtbusname) + 32);
1404
1405    /* Initialize memory regions */
1406    sprintf(namebuf, "%s.mmio", sphb->dtbusname);
1407    memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
1408
1409    sprintf(namebuf, "%s.mmio32-alias", sphb->dtbusname);
1410    memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
1411                             namebuf, &sphb->memspace,
1412                             SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
1413    memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
1414                                &sphb->mem32window);
1415
1416    sprintf(namebuf, "%s.mmio64-alias", sphb->dtbusname);
1417    memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
1418                             namebuf, &sphb->memspace,
1419                             sphb->mem64_win_pciaddr, sphb->mem64_win_size);
1420    memory_region_add_subregion(get_system_memory(), sphb->mem64_win_addr,
1421                                &sphb->mem64window);
1422
1423    /* Initialize IO regions */
1424    sprintf(namebuf, "%s.io", sphb->dtbusname);
1425    memory_region_init(&sphb->iospace, OBJECT(sphb),
1426                       namebuf, SPAPR_PCI_IO_WIN_SIZE);
1427
1428    sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
1429    memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
1430                             &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
1431    memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
1432                                &sphb->iowindow);
1433
1434    bus = pci_register_bus(dev, NULL,
1435                           pci_spapr_set_irq, pci_spapr_map_irq, sphb,
1436                           &sphb->memspace, &sphb->iospace,
1437                           PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
1438    phb->bus = bus;
1439    qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL);
1440
1441    /*
1442     * Initialize PHB address space.
1443     * By default there will be at least one subregion for default
1444     * 32bit DMA window.
1445     * Later the guest might want to create another DMA window
1446     * which will become another memory subregion.
1447     */
1448    sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
1449
1450    memory_region_init(&sphb->iommu_root, OBJECT(sphb),
1451                       namebuf, UINT64_MAX);
1452    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
1453                       sphb->dtbusname);
1454
1455    /*
1456     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
1457     * we need to allocate some memory to catch those writes coming
1458     * from msi_notify()/msix_notify().
1459     * As MSIMessage:addr is going to be the same and MSIMessage:data
1460     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
1461     * be used.
1462     *
1463     * For KVM we want to ensure that this memory is a full page so that
1464     * our memory slot is of page size granularity.
1465     */
1466#ifdef CONFIG_KVM
1467    if (kvm_enabled()) {
1468        msi_window_size = getpagesize();
1469    }
1470#endif
1471
1472    memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr,
1473                          "msi", msi_window_size);
1474    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
1475                                &sphb->msiwindow);
1476
1477    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
1478
1479    pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
1480
1481    QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
1482
1483    /* Initialize the LSI table */
1484    for (i = 0; i < PCI_NUM_PINS; i++) {
1485        uint32_t irq;
1486        Error *local_err = NULL;
1487
1488        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
1489        if (local_err) {
1490            error_propagate(errp, local_err);
1491            error_prepend(errp, "can't allocate LSIs: ");
1492            return;
1493        }
1494
1495        sphb->lsi_table[i].irq = irq;
1496    }
1497
1498    /* allocate connectors for child PCI devices */
1499    if (sphb->dr_enabled) {
1500        for (i = 0; i < PCI_SLOT_MAX * 8; i++) {
1501            spapr_dr_connector_new(OBJECT(phb),
1502                                   SPAPR_DR_CONNECTOR_TYPE_PCI,
1503                                   (sphb->index << 16) | i);
1504        }
1505    }
1506
1507    /* DMA setup */
1508    for (i = 0; i < windows_supported; ++i) {
1509        tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
1510        if (!tcet) {
1511            error_setg(errp, "Creating window#%d failed for %s",
1512                       i, sphb->dtbusname);
1513            return;
1514        }
1515        memory_region_add_subregion_overlap(&sphb->iommu_root, 0,
1516                                            spapr_tce_get_iommu(tcet), 0);
1517    }
1518
1519    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
1520}
1521
1522static int spapr_phb_children_reset(Object *child, void *opaque)
1523{
1524    DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
1525
1526    if (dev) {
1527        device_reset(dev);
1528    }
1529
1530    return 0;
1531}
1532
1533void spapr_phb_dma_reset(sPAPRPHBState *sphb)
1534{
1535    int i;
1536    sPAPRTCETable *tcet;
1537
1538    for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
1539        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
1540
1541        if (tcet && tcet->nb_table) {
1542            spapr_tce_table_disable(tcet);
1543        }
1544    }
1545
1546    /* Register default 32bit DMA window */
1547    tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
1548    spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
1549                           sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
1550}
1551
1552static void spapr_phb_reset(DeviceState *qdev)
1553{
1554    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
1555
1556    spapr_phb_dma_reset(sphb);
1557
1558    /* Reset the IOMMU state */
1559    object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
1560
1561    if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
1562        spapr_phb_vfio_reset(qdev);
1563    }
1564}
1565
1566static Property spapr_phb_properties[] = {
1567    DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1),
1568    DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
1569    DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn[0], -1),
1570    DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1),
1571    DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
1572    DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
1573                       SPAPR_PCI_MEM32_WIN_SIZE),
1574    DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1),
1575    DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size,
1576                       SPAPR_PCI_MEM64_WIN_SIZE),
1577    DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr,
1578                       -1),
1579    DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
1580    DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
1581                       SPAPR_PCI_IO_WIN_SIZE),
1582    DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled,
1583                     true),
1584    /* Default DMA window is 0..1GB */
1585    DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0),
1586    DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000),
1587    DEFINE_PROP_UINT64("dma64_win_addr", sPAPRPHBState, dma64_win_addr,
1588                       0x800000000000000ULL),
1589    DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
1590    DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
1591                       (1ULL << 12) | (1ULL << 16)),
1592    DEFINE_PROP_UINT32("numa_node", sPAPRPHBState, numa_node, -1),
1593    DEFINE_PROP_BOOL("pre-2.8-migration", sPAPRPHBState,
1594                     pre_2_8_migration, false),
1595    DEFINE_PROP_END_OF_LIST(),
1596};
1597
1598static const VMStateDescription vmstate_spapr_pci_lsi = {
1599    .name = "spapr_pci/lsi",
1600    .version_id = 1,
1601    .minimum_version_id = 1,
1602    .fields = (VMStateField[]) {
1603        VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
1604
1605        VMSTATE_END_OF_LIST()
1606    },
1607};
1608
1609static const VMStateDescription vmstate_spapr_pci_msi = {
1610    .name = "spapr_pci/msi",
1611    .version_id = 1,
1612    .minimum_version_id = 1,
1613    .fields = (VMStateField []) {
1614        VMSTATE_UINT32(key, spapr_pci_msi_mig),
1615        VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
1616        VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
1617        VMSTATE_END_OF_LIST()
1618    },
1619};
1620
1621static void spapr_pci_pre_save(void *opaque)
1622{
1623    sPAPRPHBState *sphb = opaque;
1624    GHashTableIter iter;
1625    gpointer key, value;
1626    int i;
1627
1628    g_free(sphb->msi_devs);
1629    sphb->msi_devs = NULL;
1630    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
1631    if (!sphb->msi_devs_num) {
1632        return;
1633    }
1634    sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
1635
1636    g_hash_table_iter_init(&iter, sphb->msi);
1637    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
1638        sphb->msi_devs[i].key = *(uint32_t *) key;
1639        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
1640    }
1641
1642    if (sphb->pre_2_8_migration) {
1643        sphb->mig_liobn = sphb->dma_liobn[0];
1644        sphb->mig_mem_win_addr = sphb->mem_win_addr;
1645        sphb->mig_mem_win_size = sphb->mem_win_size;
1646        sphb->mig_io_win_addr = sphb->io_win_addr;
1647        sphb->mig_io_win_size = sphb->io_win_size;
1648
1649        if ((sphb->mem64_win_size != 0)
1650            && (sphb->mem64_win_addr
1651                == (sphb->mem_win_addr + sphb->mem_win_size))) {
1652            sphb->mig_mem_win_size += sphb->mem64_win_size;
1653        }
1654    }
1655}
1656
1657static int spapr_pci_post_load(void *opaque, int version_id)
1658{
1659    sPAPRPHBState *sphb = opaque;
1660    gpointer key, value;
1661    int i;
1662
1663    for (i = 0; i < sphb->msi_devs_num; ++i) {
1664        key = g_memdup(&sphb->msi_devs[i].key,
1665                       sizeof(sphb->msi_devs[i].key));
1666        value = g_memdup(&sphb->msi_devs[i].value,
1667                         sizeof(sphb->msi_devs[i].value));
1668        g_hash_table_insert(sphb->msi, key, value);
1669    }
1670    g_free(sphb->msi_devs);
1671    sphb->msi_devs = NULL;
1672    sphb->msi_devs_num = 0;
1673
1674    return 0;
1675}
1676
1677static bool pre_2_8_migration(void *opaque, int version_id)
1678{
1679    sPAPRPHBState *sphb = opaque;
1680
1681    return sphb->pre_2_8_migration;
1682}
1683
1684static const VMStateDescription vmstate_spapr_pci = {
1685    .name = "spapr_pci",
1686    .version_id = 2,
1687    .minimum_version_id = 2,
1688    .pre_save = spapr_pci_pre_save,
1689    .post_load = spapr_pci_post_load,
1690    .fields = (VMStateField[]) {
1691        VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
1692        VMSTATE_UINT32_TEST(mig_liobn, sPAPRPHBState, pre_2_8_migration),
1693        VMSTATE_UINT64_TEST(mig_mem_win_addr, sPAPRPHBState, pre_2_8_migration),
1694        VMSTATE_UINT64_TEST(mig_mem_win_size, sPAPRPHBState, pre_2_8_migration),
1695        VMSTATE_UINT64_TEST(mig_io_win_addr, sPAPRPHBState, pre_2_8_migration),
1696        VMSTATE_UINT64_TEST(mig_io_win_size, sPAPRPHBState, pre_2_8_migration),
1697        VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
1698                             vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
1699        VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
1700        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
1701                                    vmstate_spapr_pci_msi, spapr_pci_msi_mig),
1702        VMSTATE_END_OF_LIST()
1703    },
1704};
1705
1706static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
1707                                           PCIBus *rootbus)
1708{
1709    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
1710
1711    return sphb->dtbusname;
1712}
1713
1714static void spapr_phb_class_init(ObjectClass *klass, void *data)
1715{
1716    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1717    DeviceClass *dc = DEVICE_CLASS(klass);
1718    HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
1719
1720    hc->root_bus_path = spapr_phb_root_bus_path;
1721    dc->realize = spapr_phb_realize;
1722    dc->props = spapr_phb_properties;
1723    dc->reset = spapr_phb_reset;
1724    dc->vmsd = &vmstate_spapr_pci;
1725    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1726    hp->plug = spapr_phb_hot_plug_child;
1727    hp->unplug = spapr_phb_hot_unplug_child;
1728}
1729
1730static const TypeInfo spapr_phb_info = {
1731    .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
1732    .parent        = TYPE_PCI_HOST_BRIDGE,
1733    .instance_size = sizeof(sPAPRPHBState),
1734    .class_init    = spapr_phb_class_init,
1735    .interfaces    = (InterfaceInfo[]) {
1736        { TYPE_HOTPLUG_HANDLER },
1737        { }
1738    }
1739};
1740
1741PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index)
1742{
1743    DeviceState *dev;
1744
1745    dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
1746    qdev_prop_set_uint32(dev, "index", index);
1747    qdev_init_nofail(dev);
1748
1749    return PCI_HOST_BRIDGE(dev);
1750}
1751
1752typedef struct sPAPRFDT {
1753    void *fdt;
1754    int node_off;
1755    sPAPRPHBState *sphb;
1756} sPAPRFDT;
1757
1758static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
1759                                          void *opaque)
1760{
1761    PCIBus *sec_bus;
1762    sPAPRFDT *p = opaque;
1763    int offset;
1764    sPAPRFDT s_fdt;
1765
1766    offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off);
1767    if (!offset) {
1768        error_report("Failed to create pci child device tree node");
1769        return;
1770    }
1771
1772    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
1773         PCI_HEADER_TYPE_BRIDGE)) {
1774        return;
1775    }
1776
1777    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
1778    if (!sec_bus) {
1779        return;
1780    }
1781
1782    s_fdt.fdt = p->fdt;
1783    s_fdt.node_off = offset;
1784    s_fdt.sphb = p->sphb;
1785    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
1786                        spapr_populate_pci_devices_dt,
1787                        &s_fdt);
1788}
1789
1790static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
1791                                           void *opaque)
1792{
1793    unsigned int *bus_no = opaque;
1794    unsigned int primary = *bus_no;
1795    unsigned int subordinate = 0xff;
1796    PCIBus *sec_bus = NULL;
1797
1798    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
1799         PCI_HEADER_TYPE_BRIDGE)) {
1800        return;
1801    }
1802
1803    (*bus_no)++;
1804    pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
1805    pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
1806    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
1807
1808    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
1809    if (!sec_bus) {
1810        return;
1811    }
1812
1813    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
1814    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
1815                        spapr_phb_pci_enumerate_bridge, bus_no);
1816    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
1817}
1818
1819static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
1820{
1821    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
1822    unsigned int bus_no = 0;
1823
1824    pci_for_each_device(bus, pci_bus_num(bus),
1825                        spapr_phb_pci_enumerate_bridge,
1826                        &bus_no);
1827
1828}
1829
1830int spapr_populate_pci_dt(sPAPRPHBState *phb,
1831                          uint32_t xics_phandle,
1832                          void *fdt)
1833{
1834    int bus_off, i, j, ret;
1835    char nodename[FDT_NAME_MAX];
1836    uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
1837    struct {
1838        uint32_t hi;
1839        uint64_t child;
1840        uint64_t parent;
1841        uint64_t size;
1842    } QEMU_PACKED ranges[] = {
1843        {
1844            cpu_to_be32(b_ss(1)), cpu_to_be64(0),
1845            cpu_to_be64(phb->io_win_addr),
1846            cpu_to_be64(memory_region_size(&phb->iospace)),
1847        },
1848        {
1849            cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
1850            cpu_to_be64(phb->mem_win_addr),
1851            cpu_to_be64(phb->mem_win_size),
1852        },
1853        {
1854            cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
1855            cpu_to_be64(phb->mem64_win_addr),
1856            cpu_to_be64(phb->mem64_win_size),
1857        },
1858    };
1859    const unsigned sizeof_ranges =
1860        (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
1861    uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
1862    uint32_t interrupt_map_mask[] = {
1863        cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
1864    uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
1865    uint32_t ddw_applicable[] = {
1866        cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
1867        cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
1868        cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
1869    };
1870    uint32_t ddw_extensions[] = {
1871        cpu_to_be32(1),
1872        cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
1873    };
1874    uint32_t associativity[] = {cpu_to_be32(0x4),
1875                                cpu_to_be32(0x0),
1876                                cpu_to_be32(0x0),
1877                                cpu_to_be32(0x0),
1878                                cpu_to_be32(phb->numa_node)};
1879    sPAPRTCETable *tcet;
1880    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
1881    sPAPRFDT s_fdt;
1882
1883    /* Start populating the FDT */
1884    snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid);
1885    bus_off = fdt_add_subnode(fdt, 0, nodename);
1886    if (bus_off < 0) {
1887        return bus_off;
1888    }
1889
1890    /* Write PHB properties */
1891    _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
1892    _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
1893    _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
1894    _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
1895    _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
1896    _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
1897    _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
1898    _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
1899    _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
1900    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
1901    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
1902
1903    /* Dynamic DMA window */
1904    if (phb->ddw_enabled) {
1905        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
1906                         sizeof(ddw_applicable)));
1907        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
1908                         &ddw_extensions, sizeof(ddw_extensions)));
1909    }
1910
1911    /* Advertise NUMA via ibm,associativity */
1912    if (phb->numa_node != -1) {
1913        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
1914                         sizeof(associativity)));
1915    }
1916
1917    /* Build the interrupt-map, this must matches what is done
1918     * in pci_spapr_map_irq
1919     */
1920    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
1921                     &interrupt_map_mask, sizeof(interrupt_map_mask)));
1922    for (i = 0; i < PCI_SLOT_MAX; i++) {
1923        for (j = 0; j < PCI_NUM_PINS; j++) {
1924            uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
1925            int lsi_num = pci_spapr_swizzle(i, j);
1926
1927            irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
1928            irqmap[1] = 0;
1929            irqmap[2] = 0;
1930            irqmap[3] = cpu_to_be32(j+1);
1931            irqmap[4] = cpu_to_be32(xics_phandle);
1932            irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
1933            irqmap[6] = cpu_to_be32(0x8);
1934        }
1935    }
1936    /* Write interrupt map */
1937    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
1938                     sizeof(interrupt_map)));
1939
1940    tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
1941    if (!tcet) {
1942        return -1;
1943    }
1944    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
1945                 tcet->liobn, tcet->bus_offset,
1946                 tcet->nb_table << tcet->page_shift);
1947
1948    /* Walk the bridges and program the bus numbers*/
1949    spapr_phb_pci_enumerate(phb);
1950    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
1951
1952    /* Populate tree nodes with PCI devices attached */
1953    s_fdt.fdt = fdt;
1954    s_fdt.node_off = bus_off;
1955    s_fdt.sphb = phb;
1956    pci_for_each_device(bus, pci_bus_num(bus),
1957                        spapr_populate_pci_devices_dt,
1958                        &s_fdt);
1959
1960    ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
1961                                SPAPR_DR_CONNECTOR_TYPE_PCI);
1962    if (ret) {
1963        return ret;
1964    }
1965
1966    return 0;
1967}
1968
1969void spapr_pci_rtas_init(void)
1970{
1971    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
1972                        rtas_read_pci_config);
1973    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
1974                        rtas_write_pci_config);
1975    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
1976                        rtas_ibm_read_pci_config);
1977    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
1978                        rtas_ibm_write_pci_config);
1979    if (msi_nonbroken) {
1980        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
1981                            "ibm,query-interrupt-source-number",
1982                            rtas_ibm_query_interrupt_source_number);
1983        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
1984                            rtas_ibm_change_msi);
1985    }
1986
1987    spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
1988                        "ibm,set-eeh-option",
1989                        rtas_ibm_set_eeh_option);
1990    spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
1991                        "ibm,get-config-addr-info2",
1992                        rtas_ibm_get_config_addr_info2);
1993    spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
1994                        "ibm,read-slot-reset-state2",
1995                        rtas_ibm_read_slot_reset_state2);
1996    spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
1997                        "ibm,set-slot-reset",
1998                        rtas_ibm_set_slot_reset);
1999    spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
2000                        "ibm,configure-pe",
2001                        rtas_ibm_configure_pe);
2002    spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
2003                        "ibm,slot-error-detail",
2004                        rtas_ibm_slot_error_detail);
2005}
2006
2007static void spapr_pci_register_types(void)
2008{
2009    type_register_static(&spapr_phb_info);
2010}
2011
2012type_init(spapr_pci_register_types)
2013
2014static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
2015{
2016    bool be = *(bool *)opaque;
2017
2018    if (object_dynamic_cast(OBJECT(dev), "VGA")
2019        || object_dynamic_cast(OBJECT(dev), "secondary-vga")) {
2020        object_property_set_bool(OBJECT(dev), be, "big-endian-framebuffer",
2021                                 &error_abort);
2022    }
2023    return 0;
2024}
2025
2026void spapr_pci_switch_vga(bool big_endian)
2027{
2028    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
2029    sPAPRPHBState *sphb;
2030
2031    /*
2032     * For backward compatibility with existing guests, we switch
2033     * the endianness of the VGA controller when changing the guest
2034     * interrupt mode
2035     */
2036    QLIST_FOREACH(sphb, &spapr->phbs, list) {
2037        BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
2038        qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
2039                           &big_endian);
2040    }
2041}
2042