qemu/hw/ppc/spapr_pci.c
<<
>>
Prefs
   1/*
   2 * QEMU sPAPR PCI host originated from Uninorth PCI host
   3 *
   4 * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
   5 * Copyright (C) 2011 David Gibson, IBM Corporation.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "hw/hw.h"
  30#include "hw/sysbus.h"
  31#include "hw/pci/pci.h"
  32#include "hw/pci/msi.h"
  33#include "hw/pci/msix.h"
  34#include "hw/pci/pci_host.h"
  35#include "hw/ppc/spapr.h"
  36#include "hw/pci-host/spapr.h"
  37#include "exec/address-spaces.h"
  38#include "exec/ram_addr.h"
  39#include <libfdt.h>
  40#include "trace.h"
  41#include "qemu/error-report.h"
  42#include "qapi/qmp/qerror.h"
  43#include "hw/ppc/fdt.h"
  44#include "hw/pci/pci_bridge.h"
  45#include "hw/pci/pci_bus.h"
  46#include "hw/pci/pci_ids.h"
  47#include "hw/ppc/spapr_drc.h"
  48#include "sysemu/device_tree.h"
  49#include "sysemu/kvm.h"
  50#include "sysemu/hostmem.h"
  51#include "sysemu/numa.h"
  52
  53/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
  54#define RTAS_QUERY_FN           0
  55#define RTAS_CHANGE_FN          1
  56#define RTAS_RESET_FN           2
  57#define RTAS_CHANGE_MSI_FN      3
  58#define RTAS_CHANGE_MSIX_FN     4
  59
  60/* Interrupt types to return on RTAS_CHANGE_* */
  61#define RTAS_TYPE_MSI           1
  62#define RTAS_TYPE_MSIX          2
  63
  64SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid)
  65{
  66    SpaprPhbState *sphb;
  67
  68    QLIST_FOREACH(sphb, &spapr->phbs, list) {
  69        if (sphb->buid != buid) {
  70            continue;
  71        }
  72        return sphb;
  73    }
  74
  75    return NULL;
  76}
  77
  78PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
  79                              uint32_t config_addr)
  80{
  81    SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid);
  82    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
  83    int bus_num = (config_addr >> 16) & 0xFF;
  84    int devfn = (config_addr >> 8) & 0xFF;
  85
  86    if (!phb) {
  87        return NULL;
  88    }
  89
  90    return pci_find_device(phb->bus, bus_num, devfn);
  91}
  92
  93static uint32_t rtas_pci_cfgaddr(uint32_t arg)
  94{
  95    /* This handles the encoding of extended config space addresses */
  96    return ((arg >> 20) & 0xf00) | (arg & 0xff);
  97}
  98
  99static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid,
 100                                   uint32_t addr, uint32_t size,
 101                                   target_ulong rets)
 102{
 103    PCIDevice *pci_dev;
 104    uint32_t val;
 105
 106    if ((size != 1) && (size != 2) && (size != 4)) {
 107        /* access must be 1, 2 or 4 bytes */
 108        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 109        return;
 110    }
 111
 112    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 113    addr = rtas_pci_cfgaddr(addr);
 114
 115    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 116        /* Access must be to a valid device, within bounds and
 117         * naturally aligned */
 118        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 119        return;
 120    }
 121
 122    val = pci_host_config_read_common(pci_dev, addr,
 123                                      pci_config_size(pci_dev), size);
 124
 125    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 126    rtas_st(rets, 1, val);
 127}
 128
 129static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 130                                     uint32_t token, uint32_t nargs,
 131                                     target_ulong args,
 132                                     uint32_t nret, target_ulong rets)
 133{
 134    uint64_t buid;
 135    uint32_t size, addr;
 136
 137    if ((nargs != 4) || (nret != 2)) {
 138        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 139        return;
 140    }
 141
 142    buid = rtas_ldq(args, 1);
 143    size = rtas_ld(args, 3);
 144    addr = rtas_ld(args, 0);
 145
 146    finish_read_pci_config(spapr, buid, addr, size, rets);
 147}
 148
 149static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 150                                 uint32_t token, uint32_t nargs,
 151                                 target_ulong args,
 152                                 uint32_t nret, target_ulong rets)
 153{
 154    uint32_t size, addr;
 155
 156    if ((nargs != 2) || (nret != 2)) {
 157        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 158        return;
 159    }
 160
 161    size = rtas_ld(args, 1);
 162    addr = rtas_ld(args, 0);
 163
 164    finish_read_pci_config(spapr, 0, addr, size, rets);
 165}
 166
 167static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid,
 168                                    uint32_t addr, uint32_t size,
 169                                    uint32_t val, target_ulong rets)
 170{
 171    PCIDevice *pci_dev;
 172
 173    if ((size != 1) && (size != 2) && (size != 4)) {
 174        /* access must be 1, 2 or 4 bytes */
 175        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 176        return;
 177    }
 178
 179    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 180    addr = rtas_pci_cfgaddr(addr);
 181
 182    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 183        /* Access must be to a valid device, within bounds and
 184         * naturally aligned */
 185        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 186        return;
 187    }
 188
 189    pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
 190                                 val, size);
 191
 192    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 193}
 194
 195static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 196                                      uint32_t token, uint32_t nargs,
 197                                      target_ulong args,
 198                                      uint32_t nret, target_ulong rets)
 199{
 200    uint64_t buid;
 201    uint32_t val, size, addr;
 202
 203    if ((nargs != 5) || (nret != 1)) {
 204        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 205        return;
 206    }
 207
 208    buid = rtas_ldq(args, 1);
 209    val = rtas_ld(args, 4);
 210    size = rtas_ld(args, 3);
 211    addr = rtas_ld(args, 0);
 212
 213    finish_write_pci_config(spapr, buid, addr, size, val, rets);
 214}
 215
 216static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 217                                  uint32_t token, uint32_t nargs,
 218                                  target_ulong args,
 219                                  uint32_t nret, target_ulong rets)
 220{
 221    uint32_t val, size, addr;
 222
 223    if ((nargs != 3) || (nret != 1)) {
 224        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 225        return;
 226    }
 227
 228
 229    val = rtas_ld(args, 2);
 230    size = rtas_ld(args, 1);
 231    addr = rtas_ld(args, 0);
 232
 233    finish_write_pci_config(spapr, 0, addr, size, val, rets);
 234}
 235
 236/*
 237 * Set MSI/MSIX message data.
 238 * This is required for msi_notify()/msix_notify() which
 239 * will write at the addresses via spapr_msi_write().
 240 *
 241 * If hwaddr == 0, all entries will have .data == first_irq i.e.
 242 * table will be reset.
 243 */
 244static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
 245                             unsigned first_irq, unsigned req_num)
 246{
 247    unsigned i;
 248    MSIMessage msg = { .address = addr, .data = first_irq };
 249
 250    if (!msix) {
 251        msi_set_message(pdev, msg);
 252        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
 253        return;
 254    }
 255
 256    for (i = 0; i < req_num; ++i) {
 257        msix_set_message(pdev, i, msg);
 258        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
 259        if (addr) {
 260            ++msg.data;
 261        }
 262    }
 263}
 264
 265static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
 266                                uint32_t token, uint32_t nargs,
 267                                target_ulong args, uint32_t nret,
 268                                target_ulong rets)
 269{
 270    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 271    uint32_t config_addr = rtas_ld(args, 0);
 272    uint64_t buid = rtas_ldq(args, 1);
 273    unsigned int func = rtas_ld(args, 3);
 274    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
 275    unsigned int seq_num = rtas_ld(args, 5);
 276    unsigned int ret_intr_type;
 277    unsigned int irq, max_irqs = 0;
 278    SpaprPhbState *phb = NULL;
 279    PCIDevice *pdev = NULL;
 280    spapr_pci_msi *msi;
 281    int *config_addr_key;
 282    Error *err = NULL;
 283    int i;
 284
 285    /* Fins SpaprPhbState */
 286    phb = spapr_pci_find_phb(spapr, buid);
 287    if (phb) {
 288        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 289    }
 290    if (!phb || !pdev) {
 291        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 292        return;
 293    }
 294
 295    switch (func) {
 296    case RTAS_CHANGE_FN:
 297        if (msi_present(pdev)) {
 298            ret_intr_type = RTAS_TYPE_MSI;
 299        } else if (msix_present(pdev)) {
 300            ret_intr_type = RTAS_TYPE_MSIX;
 301        } else {
 302            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 303            return;
 304        }
 305        break;
 306    case RTAS_CHANGE_MSI_FN:
 307        if (msi_present(pdev)) {
 308            ret_intr_type = RTAS_TYPE_MSI;
 309        } else {
 310            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 311            return;
 312        }
 313        break;
 314    case RTAS_CHANGE_MSIX_FN:
 315        if (msix_present(pdev)) {
 316            ret_intr_type = RTAS_TYPE_MSIX;
 317        } else {
 318            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 319            return;
 320        }
 321        break;
 322    default:
 323        error_report("rtas_ibm_change_msi(%u) is not implemented", func);
 324        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 325        return;
 326    }
 327
 328    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 329
 330    /* Releasing MSIs */
 331    if (!req_num) {
 332        if (!msi) {
 333            trace_spapr_pci_msi("Releasing wrong config", config_addr);
 334            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 335            return;
 336        }
 337
 338        if (!smc->legacy_irq_allocation) {
 339            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
 340        }
 341        spapr_irq_free(spapr, msi->first_irq, msi->num);
 342        if (msi_present(pdev)) {
 343            spapr_msi_setmsg(pdev, 0, false, 0, 0);
 344        }
 345        if (msix_present(pdev)) {
 346            spapr_msi_setmsg(pdev, 0, true, 0, 0);
 347        }
 348        g_hash_table_remove(phb->msi, &config_addr);
 349
 350        trace_spapr_pci_msi("Released MSIs", config_addr);
 351        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 352        rtas_st(rets, 1, 0);
 353        return;
 354    }
 355
 356    /* Enabling MSI */
 357
 358    /* Check if the device supports as many IRQs as requested */
 359    if (ret_intr_type == RTAS_TYPE_MSI) {
 360        max_irqs = msi_nr_vectors_allocated(pdev);
 361    } else if (ret_intr_type == RTAS_TYPE_MSIX) {
 362        max_irqs = pdev->msix_entries_nr;
 363    }
 364    if (!max_irqs) {
 365        error_report("Requested interrupt type %d is not enabled for device %x",
 366                     ret_intr_type, config_addr);
 367        rtas_st(rets, 0, -1); /* Hardware error */
 368        return;
 369    }
 370    /* Correct the number if the guest asked for too many */
 371    if (req_num > max_irqs) {
 372        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
 373        req_num = max_irqs;
 374        irq = 0; /* to avoid misleading trace */
 375        goto out;
 376    }
 377
 378    /* Allocate MSIs */
 379    if (smc->legacy_irq_allocation) {
 380        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
 381                             &err);
 382    } else {
 383        irq = spapr_irq_msi_alloc(spapr, req_num,
 384                                  ret_intr_type == RTAS_TYPE_MSI, &err);
 385    }
 386    if (err) {
 387        error_reportf_err(err, "Can't allocate MSIs for device %x: ",
 388                          config_addr);
 389        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 390        return;
 391    }
 392
 393    for (i = 0; i < req_num; i++) {
 394        spapr_irq_claim(spapr, irq + i, false, &err);
 395        if (err) {
 396            if (i) {
 397                spapr_irq_free(spapr, irq, i);
 398            }
 399            if (!smc->legacy_irq_allocation) {
 400                spapr_irq_msi_free(spapr, irq, req_num);
 401            }
 402            error_reportf_err(err, "Can't allocate MSIs for device %x: ",
 403                              config_addr);
 404            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 405            return;
 406        }
 407    }
 408
 409    /* Release previous MSIs */
 410    if (msi) {
 411        if (!smc->legacy_irq_allocation) {
 412            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
 413        }
 414        spapr_irq_free(spapr, msi->first_irq, msi->num);
 415        g_hash_table_remove(phb->msi, &config_addr);
 416    }
 417
 418    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
 419    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
 420                     irq, req_num);
 421
 422    /* Add MSI device to cache */
 423    msi = g_new(spapr_pci_msi, 1);
 424    msi->first_irq = irq;
 425    msi->num = req_num;
 426    config_addr_key = g_new(int, 1);
 427    *config_addr_key = config_addr;
 428    g_hash_table_insert(phb->msi, config_addr_key, msi);
 429
 430out:
 431    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 432    rtas_st(rets, 1, req_num);
 433    rtas_st(rets, 2, ++seq_num);
 434    if (nret > 3) {
 435        rtas_st(rets, 3, ret_intr_type);
 436    }
 437
 438    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
 439}
 440
 441static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
 442                                                   SpaprMachineState *spapr,
 443                                                   uint32_t token,
 444                                                   uint32_t nargs,
 445                                                   target_ulong args,
 446                                                   uint32_t nret,
 447                                                   target_ulong rets)
 448{
 449    uint32_t config_addr = rtas_ld(args, 0);
 450    uint64_t buid = rtas_ldq(args, 1);
 451    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
 452    SpaprPhbState *phb = NULL;
 453    PCIDevice *pdev = NULL;
 454    spapr_pci_msi *msi;
 455
 456    /* Find SpaprPhbState */
 457    phb = spapr_pci_find_phb(spapr, buid);
 458    if (phb) {
 459        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 460    }
 461    if (!phb || !pdev) {
 462        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 463        return;
 464    }
 465
 466    /* Find device descriptor and start IRQ */
 467    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 468    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
 469        trace_spapr_pci_msi("Failed to return vector", config_addr);
 470        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 471        return;
 472    }
 473    intr_src_num = msi->first_irq + ioa_intr_num;
 474    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
 475                                                           intr_src_num);
 476
 477    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 478    rtas_st(rets, 1, intr_src_num);
 479    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
 480}
 481
 482static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
 483                                    SpaprMachineState *spapr,
 484                                    uint32_t token, uint32_t nargs,
 485                                    target_ulong args, uint32_t nret,
 486                                    target_ulong rets)
 487{
 488    SpaprPhbState *sphb;
 489    uint32_t addr, option;
 490    uint64_t buid;
 491    int ret;
 492
 493    if ((nargs != 4) || (nret != 1)) {
 494        goto param_error_exit;
 495    }
 496
 497    buid = rtas_ldq(args, 1);
 498    addr = rtas_ld(args, 0);
 499    option = rtas_ld(args, 3);
 500
 501    sphb = spapr_pci_find_phb(spapr, buid);
 502    if (!sphb) {
 503        goto param_error_exit;
 504    }
 505
 506    if (!spapr_phb_eeh_available(sphb)) {
 507        goto param_error_exit;
 508    }
 509
 510    ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
 511    rtas_st(rets, 0, ret);
 512    return;
 513
 514param_error_exit:
 515    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 516}
 517
 518static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
 519                                           SpaprMachineState *spapr,
 520                                           uint32_t token, uint32_t nargs,
 521                                           target_ulong args, uint32_t nret,
 522                                           target_ulong rets)
 523{
 524    SpaprPhbState *sphb;
 525    PCIDevice *pdev;
 526    uint32_t addr, option;
 527    uint64_t buid;
 528
 529    if ((nargs != 4) || (nret != 2)) {
 530        goto param_error_exit;
 531    }
 532
 533    buid = rtas_ldq(args, 1);
 534    sphb = spapr_pci_find_phb(spapr, buid);
 535    if (!sphb) {
 536        goto param_error_exit;
 537    }
 538
 539    if (!spapr_phb_eeh_available(sphb)) {
 540        goto param_error_exit;
 541    }
 542
 543    /*
 544     * We always have PE address of form "00BB0001". "BB"
 545     * represents the bus number of PE's primary bus.
 546     */
 547    option = rtas_ld(args, 3);
 548    switch (option) {
 549    case RTAS_GET_PE_ADDR:
 550        addr = rtas_ld(args, 0);
 551        pdev = spapr_pci_find_dev(spapr, buid, addr);
 552        if (!pdev) {
 553            goto param_error_exit;
 554        }
 555
 556        rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
 557        break;
 558    case RTAS_GET_PE_MODE:
 559        rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
 560        break;
 561    default:
 562        goto param_error_exit;
 563    }
 564
 565    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 566    return;
 567
 568param_error_exit:
 569    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 570}
 571
 572static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
 573                                            SpaprMachineState *spapr,
 574                                            uint32_t token, uint32_t nargs,
 575                                            target_ulong args, uint32_t nret,
 576                                            target_ulong rets)
 577{
 578    SpaprPhbState *sphb;
 579    uint64_t buid;
 580    int state, ret;
 581
 582    if ((nargs != 3) || (nret != 4 && nret != 5)) {
 583        goto param_error_exit;
 584    }
 585
 586    buid = rtas_ldq(args, 1);
 587    sphb = spapr_pci_find_phb(spapr, buid);
 588    if (!sphb) {
 589        goto param_error_exit;
 590    }
 591
 592    if (!spapr_phb_eeh_available(sphb)) {
 593        goto param_error_exit;
 594    }
 595
 596    ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
 597    rtas_st(rets, 0, ret);
 598    if (ret != RTAS_OUT_SUCCESS) {
 599        return;
 600    }
 601
 602    rtas_st(rets, 1, state);
 603    rtas_st(rets, 2, RTAS_EEH_SUPPORT);
 604    rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
 605    if (nret >= 5) {
 606        rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
 607    }
 608    return;
 609
 610param_error_exit:
 611    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 612}
 613
 614static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
 615                                    SpaprMachineState *spapr,
 616                                    uint32_t token, uint32_t nargs,
 617                                    target_ulong args, uint32_t nret,
 618                                    target_ulong rets)
 619{
 620    SpaprPhbState *sphb;
 621    uint32_t option;
 622    uint64_t buid;
 623    int ret;
 624
 625    if ((nargs != 4) || (nret != 1)) {
 626        goto param_error_exit;
 627    }
 628
 629    buid = rtas_ldq(args, 1);
 630    option = rtas_ld(args, 3);
 631    sphb = spapr_pci_find_phb(spapr, buid);
 632    if (!sphb) {
 633        goto param_error_exit;
 634    }
 635
 636    if (!spapr_phb_eeh_available(sphb)) {
 637        goto param_error_exit;
 638    }
 639
 640    ret = spapr_phb_vfio_eeh_reset(sphb, option);
 641    rtas_st(rets, 0, ret);
 642    return;
 643
 644param_error_exit:
 645    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 646}
 647
 648static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
 649                                  SpaprMachineState *spapr,
 650                                  uint32_t token, uint32_t nargs,
 651                                  target_ulong args, uint32_t nret,
 652                                  target_ulong rets)
 653{
 654    SpaprPhbState *sphb;
 655    uint64_t buid;
 656    int ret;
 657
 658    if ((nargs != 3) || (nret != 1)) {
 659        goto param_error_exit;
 660    }
 661
 662    buid = rtas_ldq(args, 1);
 663    sphb = spapr_pci_find_phb(spapr, buid);
 664    if (!sphb) {
 665        goto param_error_exit;
 666    }
 667
 668    if (!spapr_phb_eeh_available(sphb)) {
 669        goto param_error_exit;
 670    }
 671
 672    ret = spapr_phb_vfio_eeh_configure(sphb);
 673    rtas_st(rets, 0, ret);
 674    return;
 675
 676param_error_exit:
 677    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 678}
 679
 680/* To support it later */
 681static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
 682                                       SpaprMachineState *spapr,
 683                                       uint32_t token, uint32_t nargs,
 684                                       target_ulong args, uint32_t nret,
 685                                       target_ulong rets)
 686{
 687    SpaprPhbState *sphb;
 688    int option;
 689    uint64_t buid;
 690
 691    if ((nargs != 8) || (nret != 1)) {
 692        goto param_error_exit;
 693    }
 694
 695    buid = rtas_ldq(args, 1);
 696    sphb = spapr_pci_find_phb(spapr, buid);
 697    if (!sphb) {
 698        goto param_error_exit;
 699    }
 700
 701    if (!spapr_phb_eeh_available(sphb)) {
 702        goto param_error_exit;
 703    }
 704
 705    option = rtas_ld(args, 7);
 706    switch (option) {
 707    case RTAS_SLOT_TEMP_ERR_LOG:
 708    case RTAS_SLOT_PERM_ERR_LOG:
 709        break;
 710    default:
 711        goto param_error_exit;
 712    }
 713
 714    /* We don't have error log yet */
 715    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 716    return;
 717
 718param_error_exit:
 719    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 720}
 721
 722static int pci_spapr_swizzle(int slot, int pin)
 723{
 724    return (slot + pin) % PCI_NUM_PINS;
 725}
 726
 727static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
 728{
 729    /*
 730     * Here we need to convert pci_dev + irq_num to some unique value
 731     * which is less than number of IRQs on the specific bus (4).  We
 732     * use standard PCI swizzling, that is (slot number + pin number)
 733     * % 4.
 734     */
 735    return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
 736}
 737
 738static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
 739{
 740    /*
 741     * Here we use the number returned by pci_spapr_map_irq to find a
 742     * corresponding qemu_irq.
 743     */
 744    SpaprPhbState *phb = opaque;
 745
 746    trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
 747    qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
 748}
 749
 750static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
 751{
 752    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
 753    PCIINTxRoute route;
 754
 755    route.mode = PCI_INTX_ENABLED;
 756    route.irq = sphb->lsi_table[pin].irq;
 757
 758    return route;
 759}
 760
 761/*
 762 * MSI/MSIX memory region implementation.
 763 * The handler handles both MSI and MSIX.
 764 * The vector number is encoded in least bits in data.
 765 */
 766static void spapr_msi_write(void *opaque, hwaddr addr,
 767                            uint64_t data, unsigned size)
 768{
 769    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 770    uint32_t irq = data;
 771
 772    trace_spapr_pci_msi_write(addr, data, irq);
 773
 774    qemu_irq_pulse(spapr_qirq(spapr, irq));
 775}
 776
 777static const MemoryRegionOps spapr_msi_ops = {
 778    /* There is no .read as the read result is undefined by PCI spec */
 779    .read = NULL,
 780    .write = spapr_msi_write,
 781    .endianness = DEVICE_LITTLE_ENDIAN
 782};
 783
 784/*
 785 * PHB PCI device
 786 */
 787static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 788{
 789    SpaprPhbState *phb = opaque;
 790
 791    return &phb->iommu_as;
 792}
 793
 794static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
 795{
 796    char *path = NULL, *buf = NULL, *host = NULL;
 797
 798    /* Get the PCI VFIO host id */
 799    host = object_property_get_str(OBJECT(pdev), "host", NULL);
 800    if (!host) {
 801        goto err_out;
 802    }
 803
 804    /* Construct the path of the file that will give us the DT location */
 805    path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
 806    g_free(host);
 807    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
 808        goto err_out;
 809    }
 810    g_free(path);
 811
 812    /* Construct and read from host device tree the loc-code */
 813    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
 814    g_free(buf);
 815    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
 816        goto err_out;
 817    }
 818    return buf;
 819
 820err_out:
 821    g_free(path);
 822    return NULL;
 823}
 824
 825static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
 826{
 827    char *buf;
 828    const char *devtype = "qemu";
 829    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
 830
 831    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
 832        buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
 833        if (buf) {
 834            return buf;
 835        }
 836        devtype = "vfio";
 837    }
 838    /*
 839     * For emulated devices and VFIO-failure case, make up
 840     * the loc-code.
 841     */
 842    buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
 843                          devtype, pdev->name, sphb->index, busnr,
 844                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 845    return buf;
 846}
 847
 848/* Macros to operate with address in OF binding to PCI */
 849#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 850#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
 851#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
 852#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
 853#define b_ss(x)         b_x((x), 24, 2) /* the space code */
 854#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
 855#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
 856#define b_fff(x)        b_x((x), 8, 3)  /* function number */
 857#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
 858
 859/* for 'reg'/'assigned-addresses' OF properties */
 860#define RESOURCE_CELLS_SIZE 2
 861#define RESOURCE_CELLS_ADDRESS 3
 862
 863typedef struct ResourceFields {
 864    uint32_t phys_hi;
 865    uint32_t phys_mid;
 866    uint32_t phys_lo;
 867    uint32_t size_hi;
 868    uint32_t size_lo;
 869} QEMU_PACKED ResourceFields;
 870
 871typedef struct ResourceProps {
 872    ResourceFields reg[8];
 873    ResourceFields assigned[7];
 874    uint32_t reg_len;
 875    uint32_t assigned_len;
 876} ResourceProps;
 877
 878/* fill in the 'reg'/'assigned-resources' OF properties for
 879 * a PCI device. 'reg' describes resource requirements for a
 880 * device's IO/MEM regions, 'assigned-addresses' describes the
 881 * actual resource assignments.
 882 *
 883 * the properties are arrays of ('phys-addr', 'size') pairs describing
 884 * the addressable regions of the PCI device, where 'phys-addr' is a
 885 * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
 886 * (phys.hi, phys.mid, phys.lo), and 'size' is a
 887 * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
 888 *
 889 * phys.hi = 0xYYXXXXZZ, where:
 890 *   0xYY = npt000ss
 891 *          |||   |
 892 *          |||   +-- space code
 893 *          |||               |
 894 *          |||               +  00 if configuration space
 895 *          |||               +  01 if IO region,
 896 *          |||               +  10 if 32-bit MEM region
 897 *          |||               +  11 if 64-bit MEM region
 898 *          |||
 899 *          ||+------ for non-relocatable IO: 1 if aliased
 900 *          ||        for relocatable IO: 1 if below 64KB
 901 *          ||        for MEM: 1 if below 1MB
 902 *          |+------- 1 if region is prefetchable
 903 *          +-------- 1 if region is non-relocatable
 904 *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
 905 *            bits respectively
 906 *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
 907 *          to the region
 908 *
 909 * phys.mid and phys.lo correspond respectively to the hi/lo portions
 910 * of the actual address of the region.
 911 *
 912 * how the phys-addr/size values are used differ slightly between
 913 * 'reg' and 'assigned-addresses' properties. namely, 'reg' has
 914 * an additional description for the config space region of the
 915 * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0
 916 * to describe the region as relocatable, with an address-mapping
 917 * that corresponds directly to the PHB's address space for the
 918 * resource. 'assigned-addresses' always has n=1 set with an absolute
 919 * address assigned for the resource. in general, 'assigned-addresses'
 920 * won't be populated, since addresses for PCI devices are generally
 921 * unmapped initially and left to the guest to assign.
 922 *
 923 * note also that addresses defined in these properties are, at least
 924 * for PAPR guests, relative to the PHBs IO/MEM windows, and
 925 * correspond directly to the addresses in the BARs.
 926 *
 927 * in accordance with PCI Bus Binding to Open Firmware,
 928 * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
 929 * Appendix C.
 930 */
 931static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
 932{
 933    int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
 934    uint32_t dev_id = (b_bbbbbbbb(bus_num) |
 935                       b_ddddd(PCI_SLOT(d->devfn)) |
 936                       b_fff(PCI_FUNC(d->devfn)));
 937    ResourceFields *reg, *assigned;
 938    int i, reg_idx = 0, assigned_idx = 0;
 939
 940    /* config space region */
 941    reg = &rp->reg[reg_idx++];
 942    reg->phys_hi = cpu_to_be32(dev_id);
 943    reg->phys_mid = 0;
 944    reg->phys_lo = 0;
 945    reg->size_hi = 0;
 946    reg->size_lo = 0;
 947
 948    for (i = 0; i < PCI_NUM_REGIONS; i++) {
 949        if (!d->io_regions[i].size) {
 950            continue;
 951        }
 952
 953        reg = &rp->reg[reg_idx++];
 954
 955        reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
 956        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
 957            reg->phys_hi |= cpu_to_be32(b_ss(1));
 958        } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
 959            reg->phys_hi |= cpu_to_be32(b_ss(3));
 960        } else {
 961            reg->phys_hi |= cpu_to_be32(b_ss(2));
 962        }
 963        reg->phys_mid = 0;
 964        reg->phys_lo = 0;
 965        reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
 966        reg->size_lo = cpu_to_be32(d->io_regions[i].size);
 967
 968        if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) {
 969            continue;
 970        }
 971
 972        assigned = &rp->assigned[assigned_idx++];
 973        assigned->phys_hi = cpu_to_be32(be32_to_cpu(reg->phys_hi) | b_n(1));
 974        assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32);
 975        assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr);
 976        assigned->size_hi = reg->size_hi;
 977        assigned->size_lo = reg->size_lo;
 978    }
 979
 980    rp->reg_len = reg_idx * sizeof(ResourceFields);
 981    rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 982}
 983
 984typedef struct PCIClass PCIClass;
 985typedef struct PCISubClass PCISubClass;
 986typedef struct PCIIFace PCIIFace;
 987
 988struct PCIIFace {
 989    int iface;
 990    const char *name;
 991};
 992
 993struct PCISubClass {
 994    int subclass;
 995    const char *name;
 996    const PCIIFace *iface;
 997};
 998
 999struct PCIClass {
1000    const char *name;
1001    const PCISubClass *subc;
1002};
1003
1004static const PCISubClass undef_subclass[] = {
1005    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
1006    { 0xFF, NULL, NULL },
1007};
1008
1009static const PCISubClass mass_subclass[] = {
1010    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
1011    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
1012    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
1013    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
1014    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
1015    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
1016    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
1017    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
1018    { 0xFF, NULL, NULL },
1019};
1020
1021static const PCISubClass net_subclass[] = {
1022    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
1023    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
1024    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
1025    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
1026    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
1027    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
1028    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
1029    { 0xFF, NULL, NULL },
1030};
1031
1032static const PCISubClass displ_subclass[] = {
1033    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
1034    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
1035    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
1036    { 0xFF, NULL, NULL },
1037};
1038
1039static const PCISubClass media_subclass[] = {
1040    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
1041    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
1042    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
1043    { 0xFF, NULL, NULL },
1044};
1045
1046static const PCISubClass mem_subclass[] = {
1047    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
1048    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
1049    { 0xFF, NULL, NULL },
1050};
1051
1052static const PCISubClass bridg_subclass[] = {
1053    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
1054    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
1055    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
1056    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
1057    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
1058    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
1059    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
1060    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
1061    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
1062    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
1063    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
1064    { 0xFF, NULL, NULL },
1065};
1066
1067static const PCISubClass comm_subclass[] = {
1068    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
1069    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
1070    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
1071    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
1072    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
1073    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
1074    { 0xFF, NULL, NULL, },
1075};
1076
1077static const PCIIFace pic_iface[] = {
1078    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
1079    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
1080    { 0xFF, NULL },
1081};
1082
1083static const PCISubClass sys_subclass[] = {
1084    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
1085    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
1086    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
1087    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
1088    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
1089    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
1090    { 0xFF, NULL, NULL },
1091};
1092
1093static const PCISubClass inp_subclass[] = {
1094    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
1095    { PCI_CLASS_INPUT_PEN, "pen", NULL },
1096    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
1097    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
1098    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
1099    { 0xFF, NULL, NULL },
1100};
1101
1102static const PCISubClass dock_subclass[] = {
1103    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
1104    { 0xFF, NULL, NULL },
1105};
1106
1107static const PCISubClass cpu_subclass[] = {
1108    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
1109    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
1110    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
1111    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
1112    { 0xFF, NULL, NULL },
1113};
1114
1115static const PCIIFace usb_iface[] = {
1116    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
1117    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
1118    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
1119    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
1120    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
1121    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
1122    { 0xFF, NULL },
1123};
1124
1125static const PCISubClass ser_subclass[] = {
1126    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
1127    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
1128    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
1129    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
1130    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
1131    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
1132    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
1133    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
1134    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
1135    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
1136    { 0xFF, NULL, NULL },
1137};
1138
1139static const PCISubClass wrl_subclass[] = {
1140    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
1141    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
1142    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
1143    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
1144    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
1145    { 0xFF, NULL, NULL },
1146};
1147
1148static const PCISubClass sat_subclass[] = {
1149    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
1150    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
1151    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
1152    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
1153    { 0xFF, NULL, NULL },
1154};
1155
1156static const PCISubClass crypt_subclass[] = {
1157    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
1158    { PCI_CLASS_CRYPT_ENTERTAINMENT,
1159      "entertainment-encryption", NULL },
1160    { 0xFF, NULL, NULL },
1161};
1162
1163static const PCISubClass spc_subclass[] = {
1164    { PCI_CLASS_SP_DPIO, "dpio", NULL },
1165    { PCI_CLASS_SP_PERF, "counter", NULL },
1166    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
1167    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
1168    { 0xFF, NULL, NULL },
1169};
1170
1171static const PCIClass pci_classes[] = {
1172    { "legacy-device", undef_subclass },
1173    { "mass-storage",  mass_subclass },
1174    { "network", net_subclass },
1175    { "display", displ_subclass, },
1176    { "multimedia-device", media_subclass },
1177    { "memory-controller", mem_subclass },
1178    { "unknown-bridge", bridg_subclass },
1179    { "communication-controller", comm_subclass},
1180    { "system-peripheral", sys_subclass },
1181    { "input-controller", inp_subclass },
1182    { "docking-station", dock_subclass },
1183    { "cpu", cpu_subclass },
1184    { "serial-bus", ser_subclass },
1185    { "wireless-controller", wrl_subclass },
1186    { "intelligent-io", NULL },
1187    { "satellite-device", sat_subclass },
1188    { "encryption", crypt_subclass },
1189    { "data-processing-controller", spc_subclass },
1190};
1191
1192static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
1193                                        uint8_t iface)
1194{
1195    const PCIClass *pclass;
1196    const PCISubClass *psubclass;
1197    const PCIIFace *piface;
1198    const char *name;
1199
1200    if (class >= ARRAY_SIZE(pci_classes)) {
1201        return "pci";
1202    }
1203
1204    pclass = pci_classes + class;
1205    name = pclass->name;
1206
1207    if (pclass->subc == NULL) {
1208        return name;
1209    }
1210
1211    psubclass = pclass->subc;
1212    while ((psubclass->subclass & 0xff) != 0xff) {
1213        if ((psubclass->subclass & 0xff) == subclass) {
1214            name = psubclass->name;
1215            break;
1216        }
1217        psubclass++;
1218    }
1219
1220    piface = psubclass->iface;
1221    if (piface == NULL) {
1222        return name;
1223    }
1224    while ((piface->iface & 0xff) != 0xff) {
1225        if ((piface->iface & 0xff) == iface) {
1226            name = piface->name;
1227            break;
1228        }
1229        piface++;
1230    }
1231
1232    return name;
1233}
1234
1235static gchar *pci_get_node_name(PCIDevice *dev)
1236{
1237    int slot = PCI_SLOT(dev->devfn);
1238    int func = PCI_FUNC(dev->devfn);
1239    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
1240    const char *name;
1241
1242    name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
1243                                ccode & 0xff);
1244
1245    if (func != 0) {
1246        return g_strdup_printf("%s@%x,%x", name, slot, func);
1247    } else {
1248        return g_strdup_printf("%s@%x", name, slot);
1249    }
1250}
1251
1252static uint32_t spapr_phb_get_pci_drc_index(SpaprPhbState *phb,
1253                                            PCIDevice *pdev);
1254
1255static void spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
1256                                       SpaprPhbState *sphb)
1257{
1258    ResourceProps rp;
1259    bool is_bridge = false;
1260    int pci_status;
1261    char *buf = NULL;
1262    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
1263    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
1264    uint32_t max_msi, max_msix;
1265
1266    if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
1267        PCI_HEADER_TYPE_BRIDGE) {
1268        is_bridge = true;
1269    }
1270
1271    /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
1272    _FDT(fdt_setprop_cell(fdt, offset, "vendor-id",
1273                          pci_default_read_config(dev, PCI_VENDOR_ID, 2)));
1274    _FDT(fdt_setprop_cell(fdt, offset, "device-id",
1275                          pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
1276    _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
1277                          pci_default_read_config(dev, PCI_REVISION_ID, 1)));
1278    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
1279    if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
1280        _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
1281                 pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
1282    }
1283
1284    if (!is_bridge) {
1285        _FDT(fdt_setprop_cell(fdt, offset, "min-grant",
1286            pci_default_read_config(dev, PCI_MIN_GNT, 1)));
1287        _FDT(fdt_setprop_cell(fdt, offset, "max-latency",
1288            pci_default_read_config(dev, PCI_MAX_LAT, 1)));
1289    }
1290
1291    if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) {
1292        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id",
1293                 pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)));
1294    }
1295
1296    if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) {
1297        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
1298                 pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)));
1299    }
1300
1301    _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size",
1302        pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1)));
1303
1304    /* the following fdt cells are masked off the pci status register */
1305    pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
1306    _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
1307                          PCI_STATUS_DEVSEL_MASK & pci_status));
1308
1309    if (pci_status & PCI_STATUS_FAST_BACK) {
1310        _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
1311    }
1312    if (pci_status & PCI_STATUS_66MHZ) {
1313        _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
1314    }
1315    if (pci_status & PCI_STATUS_UDF) {
1316        _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
1317    }
1318
1319    _FDT(fdt_setprop_string(fdt, offset, "name",
1320                            pci_find_device_name((ccode >> 16) & 0xff,
1321                                                 (ccode >> 8) & 0xff,
1322                                                 ccode & 0xff)));
1323
1324    buf = spapr_phb_get_loc_code(sphb, dev);
1325    _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", buf));
1326    g_free(buf);
1327
1328    if (drc_index) {
1329        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
1330    }
1331
1332    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
1333                          RESOURCE_CELLS_ADDRESS));
1334    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
1335                          RESOURCE_CELLS_SIZE));
1336
1337    if (msi_present(dev)) {
1338        max_msi = msi_nr_vectors_allocated(dev);
1339        if (max_msi) {
1340            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
1341        }
1342    }
1343    if (msix_present(dev)) {
1344        max_msix = dev->msix_entries_nr;
1345        if (max_msix) {
1346            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
1347        }
1348    }
1349
1350    populate_resource_props(dev, &rp);
1351    _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
1352    _FDT(fdt_setprop(fdt, offset, "assigned-addresses",
1353                     (uint8_t *)rp.assigned, rp.assigned_len));
1354
1355    if (sphb->pcie_ecs && pci_is_express(dev)) {
1356        _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
1357    }
1358}
1359
1360/* create OF node for pci device and required OF DT properties */
1361static int spapr_create_pci_child_dt(SpaprPhbState *phb, PCIDevice *dev,
1362                                     void *fdt, int node_offset)
1363{
1364    int offset;
1365    gchar *nodename;
1366
1367    nodename = pci_get_node_name(dev);
1368    _FDT(offset = fdt_add_subnode(fdt, node_offset, nodename));
1369    g_free(nodename);
1370
1371    spapr_populate_pci_child_dt(dev, fdt, offset, phb);
1372
1373    return offset;
1374}
1375
1376/* Callback to be called during DRC release. */
1377void spapr_phb_remove_pci_device_cb(DeviceState *dev)
1378{
1379    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
1380
1381    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
1382    object_unparent(OBJECT(dev));
1383}
1384
1385static SpaprDrc *spapr_phb_get_pci_func_drc(SpaprPhbState *phb,
1386                                                    uint32_t busnr,
1387                                                    int32_t devfn)
1388{
1389    return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
1390                           (phb->index << 16) | (busnr << 8) | devfn);
1391}
1392
1393static SpaprDrc *spapr_phb_get_pci_drc(SpaprPhbState *phb,
1394                                               PCIDevice *pdev)
1395{
1396    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
1397    return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn);
1398}
1399
1400static uint32_t spapr_phb_get_pci_drc_index(SpaprPhbState *phb,
1401                                            PCIDevice *pdev)
1402{
1403    SpaprDrc *drc = spapr_phb_get_pci_drc(phb, pdev);
1404
1405    if (!drc) {
1406        return 0;
1407    }
1408
1409    return spapr_drc_index(drc);
1410}
1411
1412int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
1413                          void *fdt, int *fdt_start_offset, Error **errp)
1414{
1415    HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev);
1416    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler);
1417    PCIDevice *pdev = PCI_DEVICE(drc->dev);
1418
1419    *fdt_start_offset = spapr_create_pci_child_dt(sphb, pdev, fdt, 0);
1420    return 0;
1421}
1422
1423static void spapr_pci_plug(HotplugHandler *plug_handler,
1424                           DeviceState *plugged_dev, Error **errp)
1425{
1426    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1427    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1428    SpaprDrc *drc = spapr_phb_get_pci_drc(phb, pdev);
1429    Error *local_err = NULL;
1430    PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1431    uint32_t slotnr = PCI_SLOT(pdev->devfn);
1432
1433    /* if DR is disabled we don't need to do anything in the case of
1434     * hotplug or coldplug callbacks
1435     */
1436    if (!phb->dr_enabled) {
1437        /* if this is a hotplug operation initiated by the user
1438         * we need to let them know it's not enabled
1439         */
1440        if (plugged_dev->hotplugged) {
1441            error_setg(&local_err, QERR_BUS_NO_HOTPLUG,
1442                       object_get_typename(OBJECT(phb)));
1443        }
1444        goto out;
1445    }
1446
1447    g_assert(drc);
1448
1449    /* Following the QEMU convention used for PCIe multifunction
1450     * hotplug, we do not allow functions to be hotplugged to a
1451     * slot that already has function 0 present
1452     */
1453    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
1454        PCI_FUNC(pdev->devfn) != 0) {
1455        error_setg(&local_err, "PCI: slot %d function 0 already ocuppied by %s,"
1456                   " additional functions can no longer be exposed to guest.",
1457                   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
1458        goto out;
1459    }
1460
1461    spapr_drc_attach(drc, DEVICE(pdev), &local_err);
1462    if (local_err) {
1463        goto out;
1464    }
1465
1466    /* If this is function 0, signal hotplug for all the device functions.
1467     * Otherwise defer sending the hotplug event.
1468     */
1469    if (!spapr_drc_hotplugged(plugged_dev)) {
1470        spapr_drc_reset(drc);
1471    } else if (PCI_FUNC(pdev->devfn) == 0) {
1472        int i;
1473
1474        for (i = 0; i < 8; i++) {
1475            SpaprDrc *func_drc;
1476            SpaprDrcClass *func_drck;
1477            SpaprDREntitySense state;
1478
1479            func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1480                                                  PCI_DEVFN(slotnr, i));
1481            func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1482            state = func_drck->dr_entity_sense(func_drc);
1483
1484            if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1485                spapr_hotplug_req_add_by_index(func_drc);
1486            }
1487        }
1488    }
1489
1490out:
1491    error_propagate(errp, local_err);
1492}
1493
1494static void spapr_pci_unplug(HotplugHandler *plug_handler,
1495                             DeviceState *plugged_dev, Error **errp)
1496{
1497    /* some version guests do not wait for completion of a device
1498     * cleanup (generally done asynchronously by the kernel) before
1499     * signaling to QEMU that the device is safe, but instead sleep
1500     * for some 'safe' period of time. unfortunately on a busy host
1501     * this sleep isn't guaranteed to be long enough, resulting in
1502     * bad things like IRQ lines being left asserted during final
1503     * device removal. to deal with this we call reset just prior
1504     * to finalizing the device, which will put the device back into
1505     * an 'idle' state, as the device cleanup code expects.
1506     */
1507    pci_device_reset(PCI_DEVICE(plugged_dev));
1508    object_property_set_bool(OBJECT(plugged_dev), false, "realized", NULL);
1509}
1510
1511static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
1512                                     DeviceState *plugged_dev, Error **errp)
1513{
1514    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1515    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1516    SpaprDrc *drc = spapr_phb_get_pci_drc(phb, pdev);
1517
1518    if (!phb->dr_enabled) {
1519        error_setg(errp, QERR_BUS_NO_HOTPLUG,
1520                   object_get_typename(OBJECT(phb)));
1521        return;
1522    }
1523
1524    g_assert(drc);
1525    g_assert(drc->dev == plugged_dev);
1526
1527    if (!spapr_drc_unplug_requested(drc)) {
1528        PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1529        uint32_t slotnr = PCI_SLOT(pdev->devfn);
1530        SpaprDrc *func_drc;
1531        SpaprDrcClass *func_drck;
1532        SpaprDREntitySense state;
1533        int i;
1534
1535        /* ensure any other present functions are pending unplug */
1536        if (PCI_FUNC(pdev->devfn) == 0) {
1537            for (i = 1; i < 8; i++) {
1538                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1539                                                      PCI_DEVFN(slotnr, i));
1540                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1541                state = func_drck->dr_entity_sense(func_drc);
1542                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
1543                    && !spapr_drc_unplug_requested(func_drc)) {
1544                    error_setg(errp,
1545                               "PCI: slot %d, function %d still present. "
1546                               "Must unplug all non-0 functions first.",
1547                               slotnr, i);
1548                    return;
1549                }
1550            }
1551        }
1552
1553        spapr_drc_detach(drc);
1554
1555        /* if this isn't func 0, defer unplug event. otherwise signal removal
1556         * for all present functions
1557         */
1558        if (PCI_FUNC(pdev->devfn) == 0) {
1559            for (i = 7; i >= 0; i--) {
1560                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
1561                                                      PCI_DEVFN(slotnr, i));
1562                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1563                state = func_drck->dr_entity_sense(func_drc);
1564                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1565                    spapr_hotplug_req_remove_by_index(func_drc);
1566                }
1567            }
1568        }
1569    }
1570}
1571
1572static void spapr_phb_finalizefn(Object *obj)
1573{
1574    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj);
1575
1576    g_free(sphb->dtbusname);
1577    sphb->dtbusname = NULL;
1578}
1579
1580static void spapr_phb_unrealize(DeviceState *dev, Error **errp)
1581{
1582    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1583    SysBusDevice *s = SYS_BUS_DEVICE(dev);
1584    PCIHostState *phb = PCI_HOST_BRIDGE(s);
1585    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb);
1586    SpaprTceTable *tcet;
1587    int i;
1588    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1589
1590    if (sphb->msi) {
1591        g_hash_table_unref(sphb->msi);
1592        sphb->msi = NULL;
1593    }
1594
1595    /*
1596     * Remove IO/MMIO subregions and aliases, rest should get cleaned
1597     * via PHB's unrealize->object_finalize
1598     */
1599    for (i = windows_supported - 1; i >= 0; i--) {
1600        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
1601        if (tcet) {
1602            memory_region_del_subregion(&sphb->iommu_root,
1603                                        spapr_tce_get_iommu(tcet));
1604        }
1605    }
1606
1607    if (sphb->dr_enabled) {
1608        for (i = PCI_SLOT_MAX * 8 - 1; i >= 0; i--) {
1609            SpaprDrc *drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
1610                                                    (sphb->index << 16) | i);
1611
1612            if (drc) {
1613                object_unparent(OBJECT(drc));
1614            }
1615        }
1616    }
1617
1618    for (i = PCI_NUM_PINS - 1; i >= 0; i--) {
1619        if (sphb->lsi_table[i].irq) {
1620            spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1);
1621            sphb->lsi_table[i].irq = 0;
1622        }
1623    }
1624
1625    QLIST_REMOVE(sphb, list);
1626
1627    memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow);
1628
1629    address_space_destroy(&sphb->iommu_as);
1630
1631    qbus_set_hotplug_handler(BUS(phb->bus), NULL, &error_abort);
1632    pci_unregister_root_bus(phb->bus);
1633
1634    memory_region_del_subregion(get_system_memory(), &sphb->iowindow);
1635    if (sphb->mem64_win_pciaddr != (hwaddr)-1) {
1636        memory_region_del_subregion(get_system_memory(), &sphb->mem64window);
1637    }
1638    memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
1639}
1640
1641static bool spapr_phb_allows_extended_config_space(PCIBus *bus)
1642{
1643    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(BUS(bus)->parent);
1644
1645    return sphb->pcie_ecs;
1646}
1647
1648static void spapr_phb_root_bus_class_init(ObjectClass *klass, void *data)
1649{
1650    PCIBusClass *pbc = PCI_BUS_CLASS(klass);
1651
1652    pbc->allows_extended_config_space = spapr_phb_allows_extended_config_space;
1653}
1654
1655#define TYPE_SPAPR_PHB_ROOT_BUS "pci"
1656
1657static const TypeInfo spapr_phb_root_bus_info = {
1658    .name = TYPE_SPAPR_PHB_ROOT_BUS,
1659    .parent = TYPE_PCI_BUS,
1660    .class_init = spapr_phb_root_bus_class_init,
1661};
1662
1663static void spapr_phb_realize(DeviceState *dev, Error **errp)
1664{
1665    /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
1666     * tries to add a sPAPR PHB to a non-pseries machine.
1667     */
1668    SpaprMachineState *spapr =
1669        (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
1670                                                  TYPE_SPAPR_MACHINE);
1671    SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
1672    SysBusDevice *s = SYS_BUS_DEVICE(dev);
1673    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
1674    PCIHostState *phb = PCI_HOST_BRIDGE(s);
1675    char *namebuf;
1676    int i;
1677    PCIBus *bus;
1678    uint64_t msi_window_size = 4096;
1679    SpaprTceTable *tcet;
1680    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1681
1682    if (!spapr) {
1683        error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
1684        return;
1685    }
1686
1687    assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */
1688
1689    if (sphb->mem64_win_size != 0) {
1690        if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1691            error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
1692                       " (max 2 GiB)", sphb->mem_win_size);
1693            return;
1694        }
1695
1696        /* 64-bit window defaults to identity mapping */
1697        sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
1698    } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1699        /*
1700         * For compatibility with old configuration, if no 64-bit MMIO
1701         * window is specified, but the ordinary (32-bit) memory
1702         * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
1703         * window, with a 64-bit MMIO window following on immediately
1704         * afterwards
1705         */
1706        sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
1707        sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
1708        sphb->mem64_win_pciaddr =
1709            SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
1710        sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
1711    }
1712
1713    if (spapr_pci_find_phb(spapr, sphb->buid)) {
1714        error_setg(errp, "PCI host bridges must have unique BUIDs");
1715        return;
1716    }
1717
1718    if (sphb->numa_node != -1 &&
1719        (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) {
1720        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
1721        return;
1722    }
1723
1724    sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
1725
1726    /* Initialize memory regions */
1727    namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname);
1728    memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
1729    g_free(namebuf);
1730
1731    namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname);
1732    memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
1733                             namebuf, &sphb->memspace,
1734                             SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
1735    g_free(namebuf);
1736    memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
1737                                &sphb->mem32window);
1738
1739    if (sphb->mem64_win_size != 0) {
1740        namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname);
1741        memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
1742                                 namebuf, &sphb->memspace,
1743                                 sphb->mem64_win_pciaddr, sphb->mem64_win_size);
1744        g_free(namebuf);
1745
1746        memory_region_add_subregion(get_system_memory(),
1747                                    sphb->mem64_win_addr,
1748                                    &sphb->mem64window);
1749    }
1750
1751    /* Initialize IO regions */
1752    namebuf = g_strdup_printf("%s.io", sphb->dtbusname);
1753    memory_region_init(&sphb->iospace, OBJECT(sphb),
1754                       namebuf, SPAPR_PCI_IO_WIN_SIZE);
1755    g_free(namebuf);
1756
1757    namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname);
1758    memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
1759                             &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
1760    g_free(namebuf);
1761    memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
1762                                &sphb->iowindow);
1763
1764    bus = pci_register_root_bus(dev, NULL,
1765                                pci_spapr_set_irq, pci_spapr_map_irq, sphb,
1766                                &sphb->memspace, &sphb->iospace,
1767                                PCI_DEVFN(0, 0), PCI_NUM_PINS,
1768                                TYPE_SPAPR_PHB_ROOT_BUS);
1769    phb->bus = bus;
1770    qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb), NULL);
1771
1772    /*
1773     * Initialize PHB address space.
1774     * By default there will be at least one subregion for default
1775     * 32bit DMA window.
1776     * Later the guest might want to create another DMA window
1777     * which will become another memory subregion.
1778     */
1779    namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname);
1780    memory_region_init(&sphb->iommu_root, OBJECT(sphb),
1781                       namebuf, UINT64_MAX);
1782    g_free(namebuf);
1783    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
1784                       sphb->dtbusname);
1785
1786    /*
1787     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
1788     * we need to allocate some memory to catch those writes coming
1789     * from msi_notify()/msix_notify().
1790     * As MSIMessage:addr is going to be the same and MSIMessage:data
1791     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
1792     * be used.
1793     *
1794     * For KVM we want to ensure that this memory is a full page so that
1795     * our memory slot is of page size granularity.
1796     */
1797#ifdef CONFIG_KVM
1798    if (kvm_enabled()) {
1799        msi_window_size = getpagesize();
1800    }
1801#endif
1802
1803    memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr,
1804                          "msi", msi_window_size);
1805    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
1806                                &sphb->msiwindow);
1807
1808    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
1809
1810    pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
1811
1812    QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
1813
1814    /* Initialize the LSI table */
1815    for (i = 0; i < PCI_NUM_PINS; i++) {
1816        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
1817        Error *local_err = NULL;
1818
1819        if (smc->legacy_irq_allocation) {
1820            irq = spapr_irq_findone(spapr, &local_err);
1821            if (local_err) {
1822                error_propagate_prepend(errp, local_err,
1823                                        "can't allocate LSIs: ");
1824                /*
1825                 * Older machines will never support PHB hotplug, ie, this is an
1826                 * init only path and QEMU will terminate. No need to rollback.
1827                 */
1828                return;
1829            }
1830        }
1831
1832        spapr_irq_claim(spapr, irq, true, &local_err);
1833        if (local_err) {
1834            error_propagate_prepend(errp, local_err, "can't allocate LSIs: ");
1835            goto unrealize;
1836        }
1837
1838        sphb->lsi_table[i].irq = irq;
1839    }
1840
1841    /* allocate connectors for child PCI devices */
1842    if (sphb->dr_enabled) {
1843        for (i = 0; i < PCI_SLOT_MAX * 8; i++) {
1844            spapr_dr_connector_new(OBJECT(phb), TYPE_SPAPR_DRC_PCI,
1845                                   (sphb->index << 16) | i);
1846        }
1847    }
1848
1849    /* DMA setup */
1850    for (i = 0; i < windows_supported; ++i) {
1851        tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
1852        if (!tcet) {
1853            error_setg(errp, "Creating window#%d failed for %s",
1854                       i, sphb->dtbusname);
1855            goto unrealize;
1856        }
1857        memory_region_add_subregion(&sphb->iommu_root, 0,
1858                                    spapr_tce_get_iommu(tcet));
1859    }
1860
1861    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
1862    return;
1863
1864unrealize:
1865    spapr_phb_unrealize(dev, NULL);
1866}
1867
1868static int spapr_phb_children_reset(Object *child, void *opaque)
1869{
1870    DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
1871
1872    if (dev) {
1873        device_reset(dev);
1874    }
1875
1876    return 0;
1877}
1878
1879void spapr_phb_dma_reset(SpaprPhbState *sphb)
1880{
1881    int i;
1882    SpaprTceTable *tcet;
1883
1884    for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
1885        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
1886
1887        if (tcet && tcet->nb_table) {
1888            spapr_tce_table_disable(tcet);
1889        }
1890    }
1891
1892    /* Register default 32bit DMA window */
1893    tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
1894    spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
1895                           sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
1896}
1897
1898static void spapr_phb_reset(DeviceState *qdev)
1899{
1900    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
1901
1902    spapr_phb_dma_reset(sphb);
1903
1904    /* Reset the IOMMU state */
1905    object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
1906
1907    if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
1908        spapr_phb_vfio_reset(qdev);
1909    }
1910}
1911
1912static Property spapr_phb_properties[] = {
1913    DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1),
1914    DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size,
1915                       SPAPR_PCI_MEM32_WIN_SIZE),
1916    DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size,
1917                       SPAPR_PCI_MEM64_WIN_SIZE),
1918    DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size,
1919                       SPAPR_PCI_IO_WIN_SIZE),
1920    DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled,
1921                     true),
1922    /* Default DMA window is 0..1GB */
1923    DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0),
1924    DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000),
1925    DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr,
1926                       0x800000000000000ULL),
1927    DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true),
1928    DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask,
1929                       (1ULL << 12) | (1ULL << 16)),
1930    DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1),
1931    DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState,
1932                     pre_2_8_migration, false),
1933    DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
1934                     pcie_ecs, true),
1935    DEFINE_PROP_END_OF_LIST(),
1936};
1937
1938static const VMStateDescription vmstate_spapr_pci_lsi = {
1939    .name = "spapr_pci/lsi",
1940    .version_id = 1,
1941    .minimum_version_id = 1,
1942    .fields = (VMStateField[]) {
1943        VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL),
1944
1945        VMSTATE_END_OF_LIST()
1946    },
1947};
1948
1949static const VMStateDescription vmstate_spapr_pci_msi = {
1950    .name = "spapr_pci/msi",
1951    .version_id = 1,
1952    .minimum_version_id = 1,
1953    .fields = (VMStateField []) {
1954        VMSTATE_UINT32(key, spapr_pci_msi_mig),
1955        VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
1956        VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
1957        VMSTATE_END_OF_LIST()
1958    },
1959};
1960
1961static int spapr_pci_pre_save(void *opaque)
1962{
1963    SpaprPhbState *sphb = opaque;
1964    GHashTableIter iter;
1965    gpointer key, value;
1966    int i;
1967
1968    if (sphb->pre_2_8_migration) {
1969        sphb->mig_liobn = sphb->dma_liobn[0];
1970        sphb->mig_mem_win_addr = sphb->mem_win_addr;
1971        sphb->mig_mem_win_size = sphb->mem_win_size;
1972        sphb->mig_io_win_addr = sphb->io_win_addr;
1973        sphb->mig_io_win_size = sphb->io_win_size;
1974
1975        if ((sphb->mem64_win_size != 0)
1976            && (sphb->mem64_win_addr
1977                == (sphb->mem_win_addr + sphb->mem_win_size))) {
1978            sphb->mig_mem_win_size += sphb->mem64_win_size;
1979        }
1980    }
1981
1982    g_free(sphb->msi_devs);
1983    sphb->msi_devs = NULL;
1984    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
1985    if (!sphb->msi_devs_num) {
1986        return 0;
1987    }
1988    sphb->msi_devs = g_new(spapr_pci_msi_mig, sphb->msi_devs_num);
1989
1990    g_hash_table_iter_init(&iter, sphb->msi);
1991    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
1992        sphb->msi_devs[i].key = *(uint32_t *) key;
1993        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
1994    }
1995
1996    return 0;
1997}
1998
1999static int spapr_pci_post_load(void *opaque, int version_id)
2000{
2001    SpaprPhbState *sphb = opaque;
2002    gpointer key, value;
2003    int i;
2004
2005    for (i = 0; i < sphb->msi_devs_num; ++i) {
2006        key = g_memdup(&sphb->msi_devs[i].key,
2007                       sizeof(sphb->msi_devs[i].key));
2008        value = g_memdup(&sphb->msi_devs[i].value,
2009                         sizeof(sphb->msi_devs[i].value));
2010        g_hash_table_insert(sphb->msi, key, value);
2011    }
2012    g_free(sphb->msi_devs);
2013    sphb->msi_devs = NULL;
2014    sphb->msi_devs_num = 0;
2015
2016    return 0;
2017}
2018
2019static bool pre_2_8_migration(void *opaque, int version_id)
2020{
2021    SpaprPhbState *sphb = opaque;
2022
2023    return sphb->pre_2_8_migration;
2024}
2025
2026static const VMStateDescription vmstate_spapr_pci = {
2027    .name = "spapr_pci",
2028    .version_id = 2,
2029    .minimum_version_id = 2,
2030    .pre_save = spapr_pci_pre_save,
2031    .post_load = spapr_pci_post_load,
2032    .fields = (VMStateField[]) {
2033        VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
2034        VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration),
2035        VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration),
2036        VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration),
2037        VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration),
2038        VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration),
2039        VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0,
2040                             vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
2041        VMSTATE_INT32(msi_devs_num, SpaprPhbState),
2042        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0,
2043                                    vmstate_spapr_pci_msi, spapr_pci_msi_mig),
2044        VMSTATE_END_OF_LIST()
2045    },
2046};
2047
2048static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
2049                                           PCIBus *rootbus)
2050{
2051    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
2052
2053    return sphb->dtbusname;
2054}
2055
2056static void spapr_phb_class_init(ObjectClass *klass, void *data)
2057{
2058    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
2059    DeviceClass *dc = DEVICE_CLASS(klass);
2060    HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
2061
2062    hc->root_bus_path = spapr_phb_root_bus_path;
2063    dc->realize = spapr_phb_realize;
2064    dc->unrealize = spapr_phb_unrealize;
2065    dc->props = spapr_phb_properties;
2066    dc->reset = spapr_phb_reset;
2067    dc->vmsd = &vmstate_spapr_pci;
2068    /* Supported by TYPE_SPAPR_MACHINE */
2069    dc->user_creatable = true;
2070    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
2071    hp->plug = spapr_pci_plug;
2072    hp->unplug = spapr_pci_unplug;
2073    hp->unplug_request = spapr_pci_unplug_request;
2074}
2075
2076static const TypeInfo spapr_phb_info = {
2077    .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
2078    .parent        = TYPE_PCI_HOST_BRIDGE,
2079    .instance_size = sizeof(SpaprPhbState),
2080    .instance_finalize = spapr_phb_finalizefn,
2081    .class_init    = spapr_phb_class_init,
2082    .interfaces    = (InterfaceInfo[]) {
2083        { TYPE_HOTPLUG_HANDLER },
2084        { }
2085    }
2086};
2087
2088typedef struct SpaprFdt {
2089    void *fdt;
2090    int node_off;
2091    SpaprPhbState *sphb;
2092} SpaprFdt;
2093
2094static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
2095                                          void *opaque)
2096{
2097    PCIBus *sec_bus;
2098    SpaprFdt *p = opaque;
2099    int offset;
2100    SpaprFdt s_fdt;
2101
2102    offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off);
2103    if (!offset) {
2104        error_report("Failed to create pci child device tree node");
2105        return;
2106    }
2107
2108    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
2109         PCI_HEADER_TYPE_BRIDGE)) {
2110        return;
2111    }
2112
2113    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
2114    if (!sec_bus) {
2115        return;
2116    }
2117
2118    s_fdt.fdt = p->fdt;
2119    s_fdt.node_off = offset;
2120    s_fdt.sphb = p->sphb;
2121    pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
2122                                spapr_populate_pci_devices_dt,
2123                                &s_fdt);
2124}
2125
2126static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
2127                                           void *opaque)
2128{
2129    unsigned int *bus_no = opaque;
2130    PCIBus *sec_bus = NULL;
2131
2132    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
2133         PCI_HEADER_TYPE_BRIDGE)) {
2134        return;
2135    }
2136
2137    (*bus_no)++;
2138    pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
2139    pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
2140    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2141
2142    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
2143    if (!sec_bus) {
2144        return;
2145    }
2146
2147    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
2148                        spapr_phb_pci_enumerate_bridge, bus_no);
2149    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2150}
2151
2152static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
2153{
2154    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
2155    unsigned int bus_no = 0;
2156
2157    pci_for_each_device(bus, pci_bus_num(bus),
2158                        spapr_phb_pci_enumerate_bridge,
2159                        &bus_no);
2160
2161}
2162
2163int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
2164                          uint32_t nr_msis, int *node_offset)
2165{
2166    int bus_off, i, j, ret;
2167    gchar *nodename;
2168    uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
2169    struct {
2170        uint32_t hi;
2171        uint64_t child;
2172        uint64_t parent;
2173        uint64_t size;
2174    } QEMU_PACKED ranges[] = {
2175        {
2176            cpu_to_be32(b_ss(1)), cpu_to_be64(0),
2177            cpu_to_be64(phb->io_win_addr),
2178            cpu_to_be64(memory_region_size(&phb->iospace)),
2179        },
2180        {
2181            cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
2182            cpu_to_be64(phb->mem_win_addr),
2183            cpu_to_be64(phb->mem_win_size),
2184        },
2185        {
2186            cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
2187            cpu_to_be64(phb->mem64_win_addr),
2188            cpu_to_be64(phb->mem64_win_size),
2189        },
2190    };
2191    const unsigned sizeof_ranges =
2192        (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
2193    uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
2194    uint32_t interrupt_map_mask[] = {
2195        cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
2196    uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
2197    uint32_t ddw_applicable[] = {
2198        cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
2199        cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
2200        cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
2201    };
2202    uint32_t ddw_extensions[] = {
2203        cpu_to_be32(1),
2204        cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
2205    };
2206    uint32_t associativity[] = {cpu_to_be32(0x4),
2207                                cpu_to_be32(0x0),
2208                                cpu_to_be32(0x0),
2209                                cpu_to_be32(0x0),
2210                                cpu_to_be32(phb->numa_node)};
2211    SpaprTceTable *tcet;
2212    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
2213    SpaprFdt s_fdt;
2214    SpaprDrc *drc;
2215
2216    /* Start populating the FDT */
2217    nodename = g_strdup_printf("pci@%" PRIx64, phb->buid);
2218    _FDT(bus_off = fdt_add_subnode(fdt, 0, nodename));
2219    g_free(nodename);
2220    if (node_offset) {
2221        *node_offset = bus_off;
2222    }
2223
2224    /* Write PHB properties */
2225    _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
2226    _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
2227    _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
2228    _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
2229    _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
2230    _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
2231    _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
2232    _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
2233    _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
2234    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
2235    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", nr_msis));
2236
2237    /* Dynamic DMA window */
2238    if (phb->ddw_enabled) {
2239        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
2240                         sizeof(ddw_applicable)));
2241        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
2242                         &ddw_extensions, sizeof(ddw_extensions)));
2243    }
2244
2245    /* Advertise NUMA via ibm,associativity */
2246    if (phb->numa_node != -1) {
2247        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
2248                         sizeof(associativity)));
2249    }
2250
2251    /* Build the interrupt-map, this must matches what is done
2252     * in pci_spapr_map_irq
2253     */
2254    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
2255                     &interrupt_map_mask, sizeof(interrupt_map_mask)));
2256    for (i = 0; i < PCI_SLOT_MAX; i++) {
2257        for (j = 0; j < PCI_NUM_PINS; j++) {
2258            uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
2259            int lsi_num = pci_spapr_swizzle(i, j);
2260
2261            irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
2262            irqmap[1] = 0;
2263            irqmap[2] = 0;
2264            irqmap[3] = cpu_to_be32(j+1);
2265            irqmap[4] = cpu_to_be32(intc_phandle);
2266            spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true);
2267        }
2268    }
2269    /* Write interrupt map */
2270    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
2271                     sizeof(interrupt_map)));
2272
2273    tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
2274    if (!tcet) {
2275        return -1;
2276    }
2277    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
2278                 tcet->liobn, tcet->bus_offset,
2279                 tcet->nb_table << tcet->page_shift);
2280
2281    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index);
2282    if (drc) {
2283        uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc));
2284
2285        _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index,
2286                         sizeof(drc_index)));
2287    }
2288
2289    /* Walk the bridges and program the bus numbers*/
2290    spapr_phb_pci_enumerate(phb);
2291    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
2292
2293    /* Populate tree nodes with PCI devices attached */
2294    s_fdt.fdt = fdt;
2295    s_fdt.node_off = bus_off;
2296    s_fdt.sphb = phb;
2297    pci_for_each_device_reverse(bus, pci_bus_num(bus),
2298                                spapr_populate_pci_devices_dt,
2299                                &s_fdt);
2300
2301    ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
2302                                SPAPR_DR_CONNECTOR_TYPE_PCI);
2303    if (ret) {
2304        return ret;
2305    }
2306
2307    return 0;
2308}
2309
2310void spapr_pci_rtas_init(void)
2311{
2312    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
2313                        rtas_read_pci_config);
2314    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
2315                        rtas_write_pci_config);
2316    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
2317                        rtas_ibm_read_pci_config);
2318    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
2319                        rtas_ibm_write_pci_config);
2320    if (msi_nonbroken) {
2321        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
2322                            "ibm,query-interrupt-source-number",
2323                            rtas_ibm_query_interrupt_source_number);
2324        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
2325                            rtas_ibm_change_msi);
2326    }
2327
2328    spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
2329                        "ibm,set-eeh-option",
2330                        rtas_ibm_set_eeh_option);
2331    spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
2332                        "ibm,get-config-addr-info2",
2333                        rtas_ibm_get_config_addr_info2);
2334    spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
2335                        "ibm,read-slot-reset-state2",
2336                        rtas_ibm_read_slot_reset_state2);
2337    spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
2338                        "ibm,set-slot-reset",
2339                        rtas_ibm_set_slot_reset);
2340    spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
2341                        "ibm,configure-pe",
2342                        rtas_ibm_configure_pe);
2343    spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
2344                        "ibm,slot-error-detail",
2345                        rtas_ibm_slot_error_detail);
2346}
2347
2348static void spapr_pci_register_types(void)
2349{
2350    type_register_static(&spapr_phb_info);
2351    type_register_static(&spapr_phb_root_bus_info);
2352}
2353
2354type_init(spapr_pci_register_types)
2355
2356static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
2357{
2358    bool be = *(bool *)opaque;
2359
2360    if (object_dynamic_cast(OBJECT(dev), "VGA")
2361        || object_dynamic_cast(OBJECT(dev), "secondary-vga")) {
2362        object_property_set_bool(OBJECT(dev), be, "big-endian-framebuffer",
2363                                 &error_abort);
2364    }
2365    return 0;
2366}
2367
2368void spapr_pci_switch_vga(bool big_endian)
2369{
2370    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
2371    SpaprPhbState *sphb;
2372
2373    /*
2374     * For backward compatibility with existing guests, we switch
2375     * the endianness of the VGA controller when changing the guest
2376     * interrupt mode
2377     */
2378    QLIST_FOREACH(sphb, &spapr->phbs, list) {
2379        BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
2380        qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
2381                           &big_endian);
2382    }
2383}
2384