qemu/hw/ppc/spapr_pci.c
<<
>>
Prefs
   1/*
   2 * QEMU sPAPR PCI host originated from Uninorth PCI host
   3 *
   4 * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
   5 * Copyright (C) 2011 David Gibson, IBM Corporation.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qapi/error.h"
  28#include "cpu.h"
  29#include "hw/hw.h"
  30#include "hw/sysbus.h"
  31#include "hw/pci/pci.h"
  32#include "hw/pci/msi.h"
  33#include "hw/pci/msix.h"
  34#include "hw/pci/pci_host.h"
  35#include "hw/ppc/spapr.h"
  36#include "hw/pci-host/spapr.h"
  37#include "exec/address-spaces.h"
  38#include "exec/ram_addr.h"
  39#include <libfdt.h>
  40#include "trace.h"
  41#include "qemu/error-report.h"
  42#include "qemu/module.h"
  43#include "qapi/qmp/qerror.h"
  44#include "hw/ppc/fdt.h"
  45#include "hw/pci/pci_bridge.h"
  46#include "hw/pci/pci_bus.h"
  47#include "hw/pci/pci_ids.h"
  48#include "hw/ppc/spapr_drc.h"
  49#include "sysemu/device_tree.h"
  50#include "sysemu/kvm.h"
  51#include "sysemu/hostmem.h"
  52#include "sysemu/numa.h"
  53
  54/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
  55#define RTAS_QUERY_FN           0
  56#define RTAS_CHANGE_FN          1
  57#define RTAS_RESET_FN           2
  58#define RTAS_CHANGE_MSI_FN      3
  59#define RTAS_CHANGE_MSIX_FN     4
  60
  61/* Interrupt types to return on RTAS_CHANGE_* */
  62#define RTAS_TYPE_MSI           1
  63#define RTAS_TYPE_MSIX          2
  64
  65SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid)
  66{
  67    SpaprPhbState *sphb;
  68
  69    QLIST_FOREACH(sphb, &spapr->phbs, list) {
  70        if (sphb->buid != buid) {
  71            continue;
  72        }
  73        return sphb;
  74    }
  75
  76    return NULL;
  77}
  78
  79PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
  80                              uint32_t config_addr)
  81{
  82    SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid);
  83    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
  84    int bus_num = (config_addr >> 16) & 0xFF;
  85    int devfn = (config_addr >> 8) & 0xFF;
  86
  87    if (!phb) {
  88        return NULL;
  89    }
  90
  91    return pci_find_device(phb->bus, bus_num, devfn);
  92}
  93
  94static uint32_t rtas_pci_cfgaddr(uint32_t arg)
  95{
  96    /* This handles the encoding of extended config space addresses */
  97    return ((arg >> 20) & 0xf00) | (arg & 0xff);
  98}
  99
 100static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid,
 101                                   uint32_t addr, uint32_t size,
 102                                   target_ulong rets)
 103{
 104    PCIDevice *pci_dev;
 105    uint32_t val;
 106
 107    if ((size != 1) && (size != 2) && (size != 4)) {
 108        /* access must be 1, 2 or 4 bytes */
 109        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 110        return;
 111    }
 112
 113    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 114    addr = rtas_pci_cfgaddr(addr);
 115
 116    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 117        /* Access must be to a valid device, within bounds and
 118         * naturally aligned */
 119        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 120        return;
 121    }
 122
 123    val = pci_host_config_read_common(pci_dev, addr,
 124                                      pci_config_size(pci_dev), size);
 125
 126    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 127    rtas_st(rets, 1, val);
 128}
 129
 130static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 131                                     uint32_t token, uint32_t nargs,
 132                                     target_ulong args,
 133                                     uint32_t nret, target_ulong rets)
 134{
 135    uint64_t buid;
 136    uint32_t size, addr;
 137
 138    if ((nargs != 4) || (nret != 2)) {
 139        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 140        return;
 141    }
 142
 143    buid = rtas_ldq(args, 1);
 144    size = rtas_ld(args, 3);
 145    addr = rtas_ld(args, 0);
 146
 147    finish_read_pci_config(spapr, buid, addr, size, rets);
 148}
 149
 150static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 151                                 uint32_t token, uint32_t nargs,
 152                                 target_ulong args,
 153                                 uint32_t nret, target_ulong rets)
 154{
 155    uint32_t size, addr;
 156
 157    if ((nargs != 2) || (nret != 2)) {
 158        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 159        return;
 160    }
 161
 162    size = rtas_ld(args, 1);
 163    addr = rtas_ld(args, 0);
 164
 165    finish_read_pci_config(spapr, 0, addr, size, rets);
 166}
 167
 168static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid,
 169                                    uint32_t addr, uint32_t size,
 170                                    uint32_t val, target_ulong rets)
 171{
 172    PCIDevice *pci_dev;
 173
 174    if ((size != 1) && (size != 2) && (size != 4)) {
 175        /* access must be 1, 2 or 4 bytes */
 176        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 177        return;
 178    }
 179
 180    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
 181    addr = rtas_pci_cfgaddr(addr);
 182
 183    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
 184        /* Access must be to a valid device, within bounds and
 185         * naturally aligned */
 186        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 187        return;
 188    }
 189
 190    pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
 191                                 val, size);
 192
 193    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 194}
 195
 196static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 197                                      uint32_t token, uint32_t nargs,
 198                                      target_ulong args,
 199                                      uint32_t nret, target_ulong rets)
 200{
 201    uint64_t buid;
 202    uint32_t val, size, addr;
 203
 204    if ((nargs != 5) || (nret != 1)) {
 205        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 206        return;
 207    }
 208
 209    buid = rtas_ldq(args, 1);
 210    val = rtas_ld(args, 4);
 211    size = rtas_ld(args, 3);
 212    addr = rtas_ld(args, 0);
 213
 214    finish_write_pci_config(spapr, buid, addr, size, val, rets);
 215}
 216
 217static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
 218                                  uint32_t token, uint32_t nargs,
 219                                  target_ulong args,
 220                                  uint32_t nret, target_ulong rets)
 221{
 222    uint32_t val, size, addr;
 223
 224    if ((nargs != 3) || (nret != 1)) {
 225        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 226        return;
 227    }
 228
 229
 230    val = rtas_ld(args, 2);
 231    size = rtas_ld(args, 1);
 232    addr = rtas_ld(args, 0);
 233
 234    finish_write_pci_config(spapr, 0, addr, size, val, rets);
 235}
 236
 237/*
 238 * Set MSI/MSIX message data.
 239 * This is required for msi_notify()/msix_notify() which
 240 * will write at the addresses via spapr_msi_write().
 241 *
 242 * If hwaddr == 0, all entries will have .data == first_irq i.e.
 243 * table will be reset.
 244 */
 245static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
 246                             unsigned first_irq, unsigned req_num)
 247{
 248    unsigned i;
 249    MSIMessage msg = { .address = addr, .data = first_irq };
 250
 251    if (!msix) {
 252        msi_set_message(pdev, msg);
 253        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
 254        return;
 255    }
 256
 257    for (i = 0; i < req_num; ++i) {
 258        msix_set_message(pdev, i, msg);
 259        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
 260        if (addr) {
 261            ++msg.data;
 262        }
 263    }
 264}
 265
 266static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
 267                                uint32_t token, uint32_t nargs,
 268                                target_ulong args, uint32_t nret,
 269                                target_ulong rets)
 270{
 271    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 272    uint32_t config_addr = rtas_ld(args, 0);
 273    uint64_t buid = rtas_ldq(args, 1);
 274    unsigned int func = rtas_ld(args, 3);
 275    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
 276    unsigned int seq_num = rtas_ld(args, 5);
 277    unsigned int ret_intr_type;
 278    unsigned int irq, max_irqs = 0;
 279    SpaprPhbState *phb = NULL;
 280    PCIDevice *pdev = NULL;
 281    spapr_pci_msi *msi;
 282    int *config_addr_key;
 283    Error *err = NULL;
 284    int i;
 285
 286    /* Fins SpaprPhbState */
 287    phb = spapr_pci_find_phb(spapr, buid);
 288    if (phb) {
 289        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 290    }
 291    if (!phb || !pdev) {
 292        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 293        return;
 294    }
 295
 296    switch (func) {
 297    case RTAS_CHANGE_FN:
 298        if (msi_present(pdev)) {
 299            ret_intr_type = RTAS_TYPE_MSI;
 300        } else if (msix_present(pdev)) {
 301            ret_intr_type = RTAS_TYPE_MSIX;
 302        } else {
 303            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 304            return;
 305        }
 306        break;
 307    case RTAS_CHANGE_MSI_FN:
 308        if (msi_present(pdev)) {
 309            ret_intr_type = RTAS_TYPE_MSI;
 310        } else {
 311            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 312            return;
 313        }
 314        break;
 315    case RTAS_CHANGE_MSIX_FN:
 316        if (msix_present(pdev)) {
 317            ret_intr_type = RTAS_TYPE_MSIX;
 318        } else {
 319            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 320            return;
 321        }
 322        break;
 323    default:
 324        error_report("rtas_ibm_change_msi(%u) is not implemented", func);
 325        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 326        return;
 327    }
 328
 329    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 330
 331    /* Releasing MSIs */
 332    if (!req_num) {
 333        if (!msi) {
 334            trace_spapr_pci_msi("Releasing wrong config", config_addr);
 335            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 336            return;
 337        }
 338
 339        if (!smc->legacy_irq_allocation) {
 340            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
 341        }
 342        spapr_irq_free(spapr, msi->first_irq, msi->num);
 343        if (msi_present(pdev)) {
 344            spapr_msi_setmsg(pdev, 0, false, 0, 0);
 345        }
 346        if (msix_present(pdev)) {
 347            spapr_msi_setmsg(pdev, 0, true, 0, 0);
 348        }
 349        g_hash_table_remove(phb->msi, &config_addr);
 350
 351        trace_spapr_pci_msi("Released MSIs", config_addr);
 352        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 353        rtas_st(rets, 1, 0);
 354        return;
 355    }
 356
 357    /* Enabling MSI */
 358
 359    /* Check if the device supports as many IRQs as requested */
 360    if (ret_intr_type == RTAS_TYPE_MSI) {
 361        max_irqs = msi_nr_vectors_allocated(pdev);
 362    } else if (ret_intr_type == RTAS_TYPE_MSIX) {
 363        max_irqs = pdev->msix_entries_nr;
 364    }
 365    if (!max_irqs) {
 366        error_report("Requested interrupt type %d is not enabled for device %x",
 367                     ret_intr_type, config_addr);
 368        rtas_st(rets, 0, -1); /* Hardware error */
 369        return;
 370    }
 371    /* Correct the number if the guest asked for too many */
 372    if (req_num > max_irqs) {
 373        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
 374        req_num = max_irqs;
 375        irq = 0; /* to avoid misleading trace */
 376        goto out;
 377    }
 378
 379    /* Allocate MSIs */
 380    if (smc->legacy_irq_allocation) {
 381        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
 382                             &err);
 383    } else {
 384        irq = spapr_irq_msi_alloc(spapr, req_num,
 385                                  ret_intr_type == RTAS_TYPE_MSI, &err);
 386    }
 387    if (err) {
 388        error_reportf_err(err, "Can't allocate MSIs for device %x: ",
 389                          config_addr);
 390        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 391        return;
 392    }
 393
 394    for (i = 0; i < req_num; i++) {
 395        spapr_irq_claim(spapr, irq + i, false, &err);
 396        if (err) {
 397            if (i) {
 398                spapr_irq_free(spapr, irq, i);
 399            }
 400            if (!smc->legacy_irq_allocation) {
 401                spapr_irq_msi_free(spapr, irq, req_num);
 402            }
 403            error_reportf_err(err, "Can't allocate MSIs for device %x: ",
 404                              config_addr);
 405            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 406            return;
 407        }
 408    }
 409
 410    /* Release previous MSIs */
 411    if (msi) {
 412        if (!smc->legacy_irq_allocation) {
 413            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
 414        }
 415        spapr_irq_free(spapr, msi->first_irq, msi->num);
 416        g_hash_table_remove(phb->msi, &config_addr);
 417    }
 418
 419    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
 420    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
 421                     irq, req_num);
 422
 423    /* Add MSI device to cache */
 424    msi = g_new(spapr_pci_msi, 1);
 425    msi->first_irq = irq;
 426    msi->num = req_num;
 427    config_addr_key = g_new(int, 1);
 428    *config_addr_key = config_addr;
 429    g_hash_table_insert(phb->msi, config_addr_key, msi);
 430
 431out:
 432    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 433    rtas_st(rets, 1, req_num);
 434    rtas_st(rets, 2, ++seq_num);
 435    if (nret > 3) {
 436        rtas_st(rets, 3, ret_intr_type);
 437    }
 438
 439    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
 440}
 441
 442static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
 443                                                   SpaprMachineState *spapr,
 444                                                   uint32_t token,
 445                                                   uint32_t nargs,
 446                                                   target_ulong args,
 447                                                   uint32_t nret,
 448                                                   target_ulong rets)
 449{
 450    uint32_t config_addr = rtas_ld(args, 0);
 451    uint64_t buid = rtas_ldq(args, 1);
 452    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
 453    SpaprPhbState *phb = NULL;
 454    PCIDevice *pdev = NULL;
 455    spapr_pci_msi *msi;
 456
 457    /* Find SpaprPhbState */
 458    phb = spapr_pci_find_phb(spapr, buid);
 459    if (phb) {
 460        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
 461    }
 462    if (!phb || !pdev) {
 463        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 464        return;
 465    }
 466
 467    /* Find device descriptor and start IRQ */
 468    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
 469    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
 470        trace_spapr_pci_msi("Failed to return vector", config_addr);
 471        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 472        return;
 473    }
 474    intr_src_num = msi->first_irq + ioa_intr_num;
 475    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
 476                                                           intr_src_num);
 477
 478    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 479    rtas_st(rets, 1, intr_src_num);
 480    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
 481}
 482
 483static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
 484                                    SpaprMachineState *spapr,
 485                                    uint32_t token, uint32_t nargs,
 486                                    target_ulong args, uint32_t nret,
 487                                    target_ulong rets)
 488{
 489    SpaprPhbState *sphb;
 490    uint32_t addr, option;
 491    uint64_t buid;
 492    int ret;
 493
 494    if ((nargs != 4) || (nret != 1)) {
 495        goto param_error_exit;
 496    }
 497
 498    buid = rtas_ldq(args, 1);
 499    addr = rtas_ld(args, 0);
 500    option = rtas_ld(args, 3);
 501
 502    sphb = spapr_pci_find_phb(spapr, buid);
 503    if (!sphb) {
 504        goto param_error_exit;
 505    }
 506
 507    if (!spapr_phb_eeh_available(sphb)) {
 508        goto param_error_exit;
 509    }
 510
 511    ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
 512    rtas_st(rets, 0, ret);
 513    return;
 514
 515param_error_exit:
 516    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 517}
 518
 519static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
 520                                           SpaprMachineState *spapr,
 521                                           uint32_t token, uint32_t nargs,
 522                                           target_ulong args, uint32_t nret,
 523                                           target_ulong rets)
 524{
 525    SpaprPhbState *sphb;
 526    PCIDevice *pdev;
 527    uint32_t addr, option;
 528    uint64_t buid;
 529
 530    if ((nargs != 4) || (nret != 2)) {
 531        goto param_error_exit;
 532    }
 533
 534    buid = rtas_ldq(args, 1);
 535    sphb = spapr_pci_find_phb(spapr, buid);
 536    if (!sphb) {
 537        goto param_error_exit;
 538    }
 539
 540    if (!spapr_phb_eeh_available(sphb)) {
 541        goto param_error_exit;
 542    }
 543
 544    /*
 545     * We always have PE address of form "00BB0001". "BB"
 546     * represents the bus number of PE's primary bus.
 547     */
 548    option = rtas_ld(args, 3);
 549    switch (option) {
 550    case RTAS_GET_PE_ADDR:
 551        addr = rtas_ld(args, 0);
 552        pdev = spapr_pci_find_dev(spapr, buid, addr);
 553        if (!pdev) {
 554            goto param_error_exit;
 555        }
 556
 557        rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
 558        break;
 559    case RTAS_GET_PE_MODE:
 560        rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
 561        break;
 562    default:
 563        goto param_error_exit;
 564    }
 565
 566    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 567    return;
 568
 569param_error_exit:
 570    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 571}
 572
 573static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
 574                                            SpaprMachineState *spapr,
 575                                            uint32_t token, uint32_t nargs,
 576                                            target_ulong args, uint32_t nret,
 577                                            target_ulong rets)
 578{
 579    SpaprPhbState *sphb;
 580    uint64_t buid;
 581    int state, ret;
 582
 583    if ((nargs != 3) || (nret != 4 && nret != 5)) {
 584        goto param_error_exit;
 585    }
 586
 587    buid = rtas_ldq(args, 1);
 588    sphb = spapr_pci_find_phb(spapr, buid);
 589    if (!sphb) {
 590        goto param_error_exit;
 591    }
 592
 593    if (!spapr_phb_eeh_available(sphb)) {
 594        goto param_error_exit;
 595    }
 596
 597    ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
 598    rtas_st(rets, 0, ret);
 599    if (ret != RTAS_OUT_SUCCESS) {
 600        return;
 601    }
 602
 603    rtas_st(rets, 1, state);
 604    rtas_st(rets, 2, RTAS_EEH_SUPPORT);
 605    rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
 606    if (nret >= 5) {
 607        rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
 608    }
 609    return;
 610
 611param_error_exit:
 612    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 613}
 614
 615static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
 616                                    SpaprMachineState *spapr,
 617                                    uint32_t token, uint32_t nargs,
 618                                    target_ulong args, uint32_t nret,
 619                                    target_ulong rets)
 620{
 621    SpaprPhbState *sphb;
 622    uint32_t option;
 623    uint64_t buid;
 624    int ret;
 625
 626    if ((nargs != 4) || (nret != 1)) {
 627        goto param_error_exit;
 628    }
 629
 630    buid = rtas_ldq(args, 1);
 631    option = rtas_ld(args, 3);
 632    sphb = spapr_pci_find_phb(spapr, buid);
 633    if (!sphb) {
 634        goto param_error_exit;
 635    }
 636
 637    if (!spapr_phb_eeh_available(sphb)) {
 638        goto param_error_exit;
 639    }
 640
 641    ret = spapr_phb_vfio_eeh_reset(sphb, option);
 642    rtas_st(rets, 0, ret);
 643    return;
 644
 645param_error_exit:
 646    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 647}
 648
 649static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
 650                                  SpaprMachineState *spapr,
 651                                  uint32_t token, uint32_t nargs,
 652                                  target_ulong args, uint32_t nret,
 653                                  target_ulong rets)
 654{
 655    SpaprPhbState *sphb;
 656    uint64_t buid;
 657    int ret;
 658
 659    if ((nargs != 3) || (nret != 1)) {
 660        goto param_error_exit;
 661    }
 662
 663    buid = rtas_ldq(args, 1);
 664    sphb = spapr_pci_find_phb(spapr, buid);
 665    if (!sphb) {
 666        goto param_error_exit;
 667    }
 668
 669    if (!spapr_phb_eeh_available(sphb)) {
 670        goto param_error_exit;
 671    }
 672
 673    ret = spapr_phb_vfio_eeh_configure(sphb);
 674    rtas_st(rets, 0, ret);
 675    return;
 676
 677param_error_exit:
 678    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 679}
 680
 681/* To support it later */
 682static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
 683                                       SpaprMachineState *spapr,
 684                                       uint32_t token, uint32_t nargs,
 685                                       target_ulong args, uint32_t nret,
 686                                       target_ulong rets)
 687{
 688    SpaprPhbState *sphb;
 689    int option;
 690    uint64_t buid;
 691
 692    if ((nargs != 8) || (nret != 1)) {
 693        goto param_error_exit;
 694    }
 695
 696    buid = rtas_ldq(args, 1);
 697    sphb = spapr_pci_find_phb(spapr, buid);
 698    if (!sphb) {
 699        goto param_error_exit;
 700    }
 701
 702    if (!spapr_phb_eeh_available(sphb)) {
 703        goto param_error_exit;
 704    }
 705
 706    option = rtas_ld(args, 7);
 707    switch (option) {
 708    case RTAS_SLOT_TEMP_ERR_LOG:
 709    case RTAS_SLOT_PERM_ERR_LOG:
 710        break;
 711    default:
 712        goto param_error_exit;
 713    }
 714
 715    /* We don't have error log yet */
 716    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 717    return;
 718
 719param_error_exit:
 720    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 721}
 722
 723static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
 724{
 725    /*
 726     * Here we use the number returned by pci_swizzle_map_irq_fn to find a
 727     * corresponding qemu_irq.
 728     */
 729    SpaprPhbState *phb = opaque;
 730
 731    trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
 732    qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
 733}
 734
 735static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
 736{
 737    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
 738    PCIINTxRoute route;
 739
 740    route.mode = PCI_INTX_ENABLED;
 741    route.irq = sphb->lsi_table[pin].irq;
 742
 743    return route;
 744}
 745
 746/*
 747 * MSI/MSIX memory region implementation.
 748 * The handler handles both MSI and MSIX.
 749 * The vector number is encoded in least bits in data.
 750 */
 751static void spapr_msi_write(void *opaque, hwaddr addr,
 752                            uint64_t data, unsigned size)
 753{
 754    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 755    uint32_t irq = data;
 756
 757    trace_spapr_pci_msi_write(addr, data, irq);
 758
 759    qemu_irq_pulse(spapr_qirq(spapr, irq));
 760}
 761
 762static const MemoryRegionOps spapr_msi_ops = {
 763    /* There is no .read as the read result is undefined by PCI spec */
 764    .read = NULL,
 765    .write = spapr_msi_write,
 766    .endianness = DEVICE_LITTLE_ENDIAN
 767};
 768
 769/*
 770 * PHB PCI device
 771 */
 772static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 773{
 774    SpaprPhbState *phb = opaque;
 775
 776    return &phb->iommu_as;
 777}
 778
 779static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
 780{
 781    char *path = NULL, *buf = NULL, *host = NULL;
 782
 783    /* Get the PCI VFIO host id */
 784    host = object_property_get_str(OBJECT(pdev), "host", NULL);
 785    if (!host) {
 786        goto err_out;
 787    }
 788
 789    /* Construct the path of the file that will give us the DT location */
 790    path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
 791    g_free(host);
 792    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
 793        goto err_out;
 794    }
 795    g_free(path);
 796
 797    /* Construct and read from host device tree the loc-code */
 798    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
 799    g_free(buf);
 800    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
 801        goto err_out;
 802    }
 803    return buf;
 804
 805err_out:
 806    g_free(path);
 807    return NULL;
 808}
 809
 810static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
 811{
 812    char *buf;
 813    const char *devtype = "qemu";
 814    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
 815
 816    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
 817        buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
 818        if (buf) {
 819            return buf;
 820        }
 821        devtype = "vfio";
 822    }
 823    /*
 824     * For emulated devices and VFIO-failure case, make up
 825     * the loc-code.
 826     */
 827    buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
 828                          devtype, pdev->name, sphb->index, busnr,
 829                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 830    return buf;
 831}
 832
 833/* Macros to operate with address in OF binding to PCI */
 834#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 835#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
 836#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
 837#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
 838#define b_ss(x)         b_x((x), 24, 2) /* the space code */
 839#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
 840#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
 841#define b_fff(x)        b_x((x), 8, 3)  /* function number */
 842#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
 843
 844/* for 'reg'/'assigned-addresses' OF properties */
 845#define RESOURCE_CELLS_SIZE 2
 846#define RESOURCE_CELLS_ADDRESS 3
 847
 848typedef struct ResourceFields {
 849    uint32_t phys_hi;
 850    uint32_t phys_mid;
 851    uint32_t phys_lo;
 852    uint32_t size_hi;
 853    uint32_t size_lo;
 854} QEMU_PACKED ResourceFields;
 855
 856typedef struct ResourceProps {
 857    ResourceFields reg[8];
 858    ResourceFields assigned[7];
 859    uint32_t reg_len;
 860    uint32_t assigned_len;
 861} ResourceProps;
 862
 863/* fill in the 'reg'/'assigned-resources' OF properties for
 864 * a PCI device. 'reg' describes resource requirements for a
 865 * device's IO/MEM regions, 'assigned-addresses' describes the
 866 * actual resource assignments.
 867 *
 868 * the properties are arrays of ('phys-addr', 'size') pairs describing
 869 * the addressable regions of the PCI device, where 'phys-addr' is a
 870 * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
 871 * (phys.hi, phys.mid, phys.lo), and 'size' is a
 872 * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
 873 *
 874 * phys.hi = 0xYYXXXXZZ, where:
 875 *   0xYY = npt000ss
 876 *          |||   |
 877 *          |||   +-- space code
 878 *          |||               |
 879 *          |||               +  00 if configuration space
 880 *          |||               +  01 if IO region,
 881 *          |||               +  10 if 32-bit MEM region
 882 *          |||               +  11 if 64-bit MEM region
 883 *          |||
 884 *          ||+------ for non-relocatable IO: 1 if aliased
 885 *          ||        for relocatable IO: 1 if below 64KB
 886 *          ||        for MEM: 1 if below 1MB
 887 *          |+------- 1 if region is prefetchable
 888 *          +-------- 1 if region is non-relocatable
 889 *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
 890 *            bits respectively
 891 *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
 892 *          to the region
 893 *
 894 * phys.mid and phys.lo correspond respectively to the hi/lo portions
 895 * of the actual address of the region.
 896 *
 897 * how the phys-addr/size values are used differ slightly between
 898 * 'reg' and 'assigned-addresses' properties. namely, 'reg' has
 899 * an additional description for the config space region of the
 900 * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0
 901 * to describe the region as relocatable, with an address-mapping
 902 * that corresponds directly to the PHB's address space for the
 903 * resource. 'assigned-addresses' always has n=1 set with an absolute
 904 * address assigned for the resource. in general, 'assigned-addresses'
 905 * won't be populated, since addresses for PCI devices are generally
 906 * unmapped initially and left to the guest to assign.
 907 *
 908 * note also that addresses defined in these properties are, at least
 909 * for PAPR guests, relative to the PHBs IO/MEM windows, and
 910 * correspond directly to the addresses in the BARs.
 911 *
 912 * in accordance with PCI Bus Binding to Open Firmware,
 913 * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
 914 * Appendix C.
 915 */
 916static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
 917{
 918    int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
 919    uint32_t dev_id = (b_bbbbbbbb(bus_num) |
 920                       b_ddddd(PCI_SLOT(d->devfn)) |
 921                       b_fff(PCI_FUNC(d->devfn)));
 922    ResourceFields *reg, *assigned;
 923    int i, reg_idx = 0, assigned_idx = 0;
 924
 925    /* config space region */
 926    reg = &rp->reg[reg_idx++];
 927    reg->phys_hi = cpu_to_be32(dev_id);
 928    reg->phys_mid = 0;
 929    reg->phys_lo = 0;
 930    reg->size_hi = 0;
 931    reg->size_lo = 0;
 932
 933    for (i = 0; i < PCI_NUM_REGIONS; i++) {
 934        if (!d->io_regions[i].size) {
 935            continue;
 936        }
 937
 938        reg = &rp->reg[reg_idx++];
 939
 940        reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
 941        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
 942            reg->phys_hi |= cpu_to_be32(b_ss(1));
 943        } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
 944            reg->phys_hi |= cpu_to_be32(b_ss(3));
 945        } else {
 946            reg->phys_hi |= cpu_to_be32(b_ss(2));
 947        }
 948        reg->phys_mid = 0;
 949        reg->phys_lo = 0;
 950        reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
 951        reg->size_lo = cpu_to_be32(d->io_regions[i].size);
 952
 953        if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) {
 954            continue;
 955        }
 956
 957        assigned = &rp->assigned[assigned_idx++];
 958        assigned->phys_hi = cpu_to_be32(be32_to_cpu(reg->phys_hi) | b_n(1));
 959        assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32);
 960        assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr);
 961        assigned->size_hi = reg->size_hi;
 962        assigned->size_lo = reg->size_lo;
 963    }
 964
 965    rp->reg_len = reg_idx * sizeof(ResourceFields);
 966    rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 967}
 968
 969typedef struct PCIClass PCIClass;
 970typedef struct PCISubClass PCISubClass;
 971typedef struct PCIIFace PCIIFace;
 972
 973struct PCIIFace {
 974    int iface;
 975    const char *name;
 976};
 977
 978struct PCISubClass {
 979    int subclass;
 980    const char *name;
 981    const PCIIFace *iface;
 982};
 983
 984struct PCIClass {
 985    const char *name;
 986    const PCISubClass *subc;
 987};
 988
 989static const PCISubClass undef_subclass[] = {
 990    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
 991    { 0xFF, NULL, NULL },
 992};
 993
 994static const PCISubClass mass_subclass[] = {
 995    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
 996    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
 997    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
 998    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
 999    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
1000    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
1001    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
1002    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
1003    { 0xFF, NULL, NULL },
1004};
1005
1006static const PCISubClass net_subclass[] = {
1007    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
1008    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
1009    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
1010    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
1011    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
1012    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
1013    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
1014    { 0xFF, NULL, NULL },
1015};
1016
1017static const PCISubClass displ_subclass[] = {
1018    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
1019    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
1020    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
1021    { 0xFF, NULL, NULL },
1022};
1023
1024static const PCISubClass media_subclass[] = {
1025    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
1026    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
1027    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
1028    { 0xFF, NULL, NULL },
1029};
1030
1031static const PCISubClass mem_subclass[] = {
1032    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
1033    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
1034    { 0xFF, NULL, NULL },
1035};
1036
1037static const PCISubClass bridg_subclass[] = {
1038    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
1039    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
1040    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
1041    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
1042    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
1043    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
1044    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
1045    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
1046    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
1047    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
1048    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
1049    { 0xFF, NULL, NULL },
1050};
1051
1052static const PCISubClass comm_subclass[] = {
1053    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
1054    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
1055    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
1056    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
1057    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
1058    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
1059    { 0xFF, NULL, NULL, },
1060};
1061
1062static const PCIIFace pic_iface[] = {
1063    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
1064    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
1065    { 0xFF, NULL },
1066};
1067
1068static const PCISubClass sys_subclass[] = {
1069    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
1070    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
1071    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
1072    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
1073    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
1074    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
1075    { 0xFF, NULL, NULL },
1076};
1077
1078static const PCISubClass inp_subclass[] = {
1079    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
1080    { PCI_CLASS_INPUT_PEN, "pen", NULL },
1081    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
1082    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
1083    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
1084    { 0xFF, NULL, NULL },
1085};
1086
1087static const PCISubClass dock_subclass[] = {
1088    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
1089    { 0xFF, NULL, NULL },
1090};
1091
1092static const PCISubClass cpu_subclass[] = {
1093    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
1094    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
1095    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
1096    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
1097    { 0xFF, NULL, NULL },
1098};
1099
1100static const PCIIFace usb_iface[] = {
1101    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
1102    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
1103    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
1104    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
1105    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
1106    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
1107    { 0xFF, NULL },
1108};
1109
1110static const PCISubClass ser_subclass[] = {
1111    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
1112    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
1113    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
1114    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
1115    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
1116    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
1117    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
1118    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
1119    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
1120    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
1121    { 0xFF, NULL, NULL },
1122};
1123
1124static const PCISubClass wrl_subclass[] = {
1125    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
1126    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
1127    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
1128    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
1129    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
1130    { 0xFF, NULL, NULL },
1131};
1132
1133static const PCISubClass sat_subclass[] = {
1134    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
1135    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
1136    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
1137    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
1138    { 0xFF, NULL, NULL },
1139};
1140
1141static const PCISubClass crypt_subclass[] = {
1142    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
1143    { PCI_CLASS_CRYPT_ENTERTAINMENT,
1144      "entertainment-encryption", NULL },
1145    { 0xFF, NULL, NULL },
1146};
1147
1148static const PCISubClass spc_subclass[] = {
1149    { PCI_CLASS_SP_DPIO, "dpio", NULL },
1150    { PCI_CLASS_SP_PERF, "counter", NULL },
1151    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
1152    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
1153    { 0xFF, NULL, NULL },
1154};
1155
1156static const PCIClass pci_classes[] = {
1157    { "legacy-device", undef_subclass },
1158    { "mass-storage",  mass_subclass },
1159    { "network", net_subclass },
1160    { "display", displ_subclass, },
1161    { "multimedia-device", media_subclass },
1162    { "memory-controller", mem_subclass },
1163    { "unknown-bridge", bridg_subclass },
1164    { "communication-controller", comm_subclass},
1165    { "system-peripheral", sys_subclass },
1166    { "input-controller", inp_subclass },
1167    { "docking-station", dock_subclass },
1168    { "cpu", cpu_subclass },
1169    { "serial-bus", ser_subclass },
1170    { "wireless-controller", wrl_subclass },
1171    { "intelligent-io", NULL },
1172    { "satellite-device", sat_subclass },
1173    { "encryption", crypt_subclass },
1174    { "data-processing-controller", spc_subclass },
1175};
1176
1177static const char *dt_name_from_class(uint8_t class, uint8_t subclass,
1178                                      uint8_t iface)
1179{
1180    const PCIClass *pclass;
1181    const PCISubClass *psubclass;
1182    const PCIIFace *piface;
1183    const char *name;
1184
1185    if (class >= ARRAY_SIZE(pci_classes)) {
1186        return "pci";
1187    }
1188
1189    pclass = pci_classes + class;
1190    name = pclass->name;
1191
1192    if (pclass->subc == NULL) {
1193        return name;
1194    }
1195
1196    psubclass = pclass->subc;
1197    while ((psubclass->subclass & 0xff) != 0xff) {
1198        if ((psubclass->subclass & 0xff) == subclass) {
1199            name = psubclass->name;
1200            break;
1201        }
1202        psubclass++;
1203    }
1204
1205    piface = psubclass->iface;
1206    if (piface == NULL) {
1207        return name;
1208    }
1209    while ((piface->iface & 0xff) != 0xff) {
1210        if ((piface->iface & 0xff) == iface) {
1211            name = piface->name;
1212            break;
1213        }
1214        piface++;
1215    }
1216
1217    return name;
1218}
1219
1220/*
1221 * DRC helper functions
1222 */
1223
1224static uint32_t drc_id_from_devfn(SpaprPhbState *phb,
1225                                  uint8_t chassis, int32_t devfn)
1226{
1227    return (phb->index << 16) | (chassis << 8) | devfn;
1228}
1229
1230static SpaprDrc *drc_from_devfn(SpaprPhbState *phb,
1231                                uint8_t chassis, int32_t devfn)
1232{
1233    return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
1234                           drc_id_from_devfn(phb, chassis, devfn));
1235}
1236
1237static uint8_t chassis_from_bus(PCIBus *bus, Error **errp)
1238{
1239    if (pci_bus_is_root(bus)) {
1240        return 0;
1241    } else {
1242        PCIDevice *bridge = pci_bridge_get_device(bus);
1243
1244        return object_property_get_uint(OBJECT(bridge), "chassis_nr", errp);
1245    }
1246}
1247
1248static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev)
1249{
1250    Error *local_err = NULL;
1251    uint8_t chassis = chassis_from_bus(pci_get_bus(dev), &local_err);
1252
1253    if (local_err) {
1254        error_report_err(local_err);
1255        return NULL;
1256    }
1257
1258    return drc_from_devfn(phb, chassis, dev->devfn);
1259}
1260
1261static void add_drcs(SpaprPhbState *phb, PCIBus *bus, Error **errp)
1262{
1263    Object *owner;
1264    int i;
1265    uint8_t chassis;
1266    Error *local_err = NULL;
1267
1268    if (!phb->dr_enabled) {
1269        return;
1270    }
1271
1272    chassis = chassis_from_bus(bus, &local_err);
1273    if (local_err) {
1274        error_propagate(errp, local_err);
1275        return;
1276    }
1277
1278    if (pci_bus_is_root(bus)) {
1279        owner = OBJECT(phb);
1280    } else {
1281        owner = OBJECT(pci_bridge_get_device(bus));
1282    }
1283
1284    for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) {
1285        spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI,
1286                               drc_id_from_devfn(phb, chassis, i));
1287    }
1288}
1289
1290static void remove_drcs(SpaprPhbState *phb, PCIBus *bus, Error **errp)
1291{
1292    int i;
1293    uint8_t chassis;
1294    Error *local_err = NULL;
1295
1296    if (!phb->dr_enabled) {
1297        return;
1298    }
1299
1300    chassis = chassis_from_bus(bus, &local_err);
1301    if (local_err) {
1302        error_propagate(errp, local_err);
1303        return;
1304    }
1305
1306    for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) {
1307        SpaprDrc *drc = drc_from_devfn(phb, chassis, i);
1308
1309        if (drc) {
1310            object_unparent(OBJECT(drc));
1311        }
1312    }
1313}
1314
1315typedef struct PciWalkFdt {
1316    void *fdt;
1317    int offset;
1318    SpaprPhbState *sphb;
1319    int err;
1320} PciWalkFdt;
1321
1322static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
1323                               void *fdt, int parent_offset);
1324
1325static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
1326                                   void *opaque)
1327{
1328    PciWalkFdt *p = opaque;
1329    int err;
1330
1331    if (p->err) {
1332        /* Something's already broken, don't keep going */
1333        return;
1334    }
1335
1336    err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
1337    if (err < 0) {
1338        p->err = err;
1339    }
1340}
1341
1342/* Augment PCI device node with bridge specific information */
1343static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
1344                               void *fdt, int offset)
1345{
1346    Object *owner;
1347    PciWalkFdt cbinfo = {
1348        .fdt = fdt,
1349        .offset = offset,
1350        .sphb = sphb,
1351        .err = 0,
1352    };
1353    int ret;
1354
1355    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
1356                          RESOURCE_CELLS_ADDRESS));
1357    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
1358                          RESOURCE_CELLS_SIZE));
1359
1360    assert(bus);
1361    pci_for_each_device_reverse(bus, pci_bus_num(bus),
1362                                spapr_dt_pci_device_cb, &cbinfo);
1363    if (cbinfo.err) {
1364        return cbinfo.err;
1365    }
1366
1367    if (pci_bus_is_root(bus)) {
1368        owner = OBJECT(sphb);
1369    } else {
1370        owner = OBJECT(pci_bridge_get_device(bus));
1371    }
1372
1373    ret = spapr_dt_drc(fdt, offset, owner,
1374                       SPAPR_DR_CONNECTOR_TYPE_PCI);
1375    if (ret) {
1376        return ret;
1377    }
1378
1379    return offset;
1380}
1381
1382/* create OF node for pci device and required OF DT properties */
1383static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
1384                               void *fdt, int parent_offset)
1385{
1386    int offset;
1387    const gchar *basename;
1388    gchar *nodename;
1389    int slot = PCI_SLOT(dev->devfn);
1390    int func = PCI_FUNC(dev->devfn);
1391    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
1392    ResourceProps rp;
1393    SpaprDrc *drc = drc_from_dev(sphb, dev);
1394    uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2);
1395    uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2);
1396    uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1);
1397    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
1398    uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1);
1399    uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2);
1400    uint32_t subsystem_vendor_id =
1401        pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
1402    uint32_t cache_line_size =
1403        pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1);
1404    uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
1405    gchar *loc_code;
1406
1407    basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
1408                                  ccode & 0xff);
1409
1410    if (func != 0) {
1411        nodename = g_strdup_printf("%s@%x,%x", basename, slot, func);
1412    } else {
1413        nodename = g_strdup_printf("%s@%x", basename, slot);
1414    }
1415
1416    _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename));
1417
1418    g_free(nodename);
1419
1420    /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
1421    _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id));
1422    _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id));
1423    _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id));
1424
1425    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
1426    if (irq_pin) {
1427        _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin));
1428    }
1429
1430    if (subsystem_id) {
1431        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id));
1432    }
1433
1434    if (subsystem_vendor_id) {
1435        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
1436                              subsystem_vendor_id));
1437    }
1438
1439    _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size));
1440
1441
1442    /* the following fdt cells are masked off the pci status register */
1443    _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
1444                          PCI_STATUS_DEVSEL_MASK & pci_status));
1445
1446    if (pci_status & PCI_STATUS_FAST_BACK) {
1447        _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
1448    }
1449    if (pci_status & PCI_STATUS_66MHZ) {
1450        _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
1451    }
1452    if (pci_status & PCI_STATUS_UDF) {
1453        _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
1454    }
1455
1456    loc_code = spapr_phb_get_loc_code(sphb, dev);
1457    _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code));
1458    g_free(loc_code);
1459
1460    if (drc) {
1461        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index",
1462                              spapr_drc_index(drc)));
1463    }
1464
1465    if (msi_present(dev)) {
1466        uint32_t max_msi = msi_nr_vectors_allocated(dev);
1467        if (max_msi) {
1468            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
1469        }
1470    }
1471    if (msix_present(dev)) {
1472        uint32_t max_msix = dev->msix_entries_nr;
1473        if (max_msix) {
1474            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
1475        }
1476    }
1477
1478    populate_resource_props(dev, &rp);
1479    _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
1480    _FDT(fdt_setprop(fdt, offset, "assigned-addresses",
1481                     (uint8_t *)rp.assigned, rp.assigned_len));
1482
1483    if (sphb->pcie_ecs && pci_is_express(dev)) {
1484        _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
1485    }
1486
1487    spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
1488
1489    if (!pc->is_bridge) {
1490        /* Properties only for non-bridges */
1491        uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
1492        uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1);
1493        _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
1494        _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
1495        return offset;
1496    } else {
1497        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
1498
1499        return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset);
1500    }
1501}
1502
1503/* Callback to be called during DRC release. */
1504void spapr_phb_remove_pci_device_cb(DeviceState *dev)
1505{
1506    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
1507
1508    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
1509    object_unparent(OBJECT(dev));
1510}
1511
1512int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
1513                          void *fdt, int *fdt_start_offset, Error **errp)
1514{
1515    HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev);
1516    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler);
1517    PCIDevice *pdev = PCI_DEVICE(drc->dev);
1518
1519    *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0);
1520    return 0;
1521}
1522
1523static void spapr_pci_bridge_plug(SpaprPhbState *phb,
1524                                  PCIBridge *bridge,
1525                                  Error **errp)
1526{
1527    Error *local_err = NULL;
1528    PCIBus *bus = pci_bridge_get_sec_bus(bridge);
1529
1530    add_drcs(phb, bus, &local_err);
1531    if (local_err) {
1532        error_propagate(errp, local_err);
1533        return;
1534    }
1535}
1536
1537static void spapr_pci_plug(HotplugHandler *plug_handler,
1538                           DeviceState *plugged_dev, Error **errp)
1539{
1540    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1541    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1542    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
1543    SpaprDrc *drc = drc_from_dev(phb, pdev);
1544    Error *local_err = NULL;
1545    PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
1546    uint32_t slotnr = PCI_SLOT(pdev->devfn);
1547
1548    /* if DR is disabled we don't need to do anything in the case of
1549     * hotplug or coldplug callbacks
1550     */
1551    if (!phb->dr_enabled) {
1552        /* if this is a hotplug operation initiated by the user
1553         * we need to let them know it's not enabled
1554         */
1555        if (plugged_dev->hotplugged) {
1556            error_setg(&local_err, QERR_BUS_NO_HOTPLUG,
1557                       object_get_typename(OBJECT(phb)));
1558        }
1559        goto out;
1560    }
1561
1562    g_assert(drc);
1563
1564    if (pc->is_bridge) {
1565        spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev), &local_err);
1566        if (local_err) {
1567            error_propagate(errp, local_err);
1568            return;
1569        }
1570    }
1571
1572    /* Following the QEMU convention used for PCIe multifunction
1573     * hotplug, we do not allow functions to be hotplugged to a
1574     * slot that already has function 0 present
1575     */
1576    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
1577        PCI_FUNC(pdev->devfn) != 0) {
1578        error_setg(&local_err, "PCI: slot %d function 0 already ocuppied by %s,"
1579                   " additional functions can no longer be exposed to guest.",
1580                   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
1581        goto out;
1582    }
1583
1584    spapr_drc_attach(drc, DEVICE(pdev), &local_err);
1585    if (local_err) {
1586        goto out;
1587    }
1588
1589    /* If this is function 0, signal hotplug for all the device functions.
1590     * Otherwise defer sending the hotplug event.
1591     */
1592    if (!spapr_drc_hotplugged(plugged_dev)) {
1593        spapr_drc_reset(drc);
1594    } else if (PCI_FUNC(pdev->devfn) == 0) {
1595        int i;
1596        uint8_t chassis = chassis_from_bus(pci_get_bus(pdev), &local_err);
1597
1598        if (local_err) {
1599            error_propagate(errp, local_err);
1600            return;
1601        }
1602
1603        for (i = 0; i < 8; i++) {
1604            SpaprDrc *func_drc;
1605            SpaprDrcClass *func_drck;
1606            SpaprDREntitySense state;
1607
1608            func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1609            func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1610            state = func_drck->dr_entity_sense(func_drc);
1611
1612            if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1613                spapr_hotplug_req_add_by_index(func_drc);
1614            }
1615        }
1616    }
1617
1618out:
1619    error_propagate(errp, local_err);
1620}
1621
1622static void spapr_pci_bridge_unplug(SpaprPhbState *phb,
1623                                    PCIBridge *bridge,
1624                                    Error **errp)
1625{
1626    Error *local_err = NULL;
1627    PCIBus *bus = pci_bridge_get_sec_bus(bridge);
1628
1629    remove_drcs(phb, bus, &local_err);
1630    if (local_err) {
1631        error_propagate(errp, local_err);
1632        return;
1633    }
1634}
1635
1636static void spapr_pci_unplug(HotplugHandler *plug_handler,
1637                             DeviceState *plugged_dev, Error **errp)
1638{
1639    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
1640    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1641
1642    /* some version guests do not wait for completion of a device
1643     * cleanup (generally done asynchronously by the kernel) before
1644     * signaling to QEMU that the device is safe, but instead sleep
1645     * for some 'safe' period of time. unfortunately on a busy host
1646     * this sleep isn't guaranteed to be long enough, resulting in
1647     * bad things like IRQ lines being left asserted during final
1648     * device removal. to deal with this we call reset just prior
1649     * to finalizing the device, which will put the device back into
1650     * an 'idle' state, as the device cleanup code expects.
1651     */
1652    pci_device_reset(PCI_DEVICE(plugged_dev));
1653
1654    if (pc->is_bridge) {
1655        Error *local_err = NULL;
1656        spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev), &local_err);
1657        if (local_err) {
1658            error_propagate(errp, local_err);
1659        }
1660        return;
1661    }
1662
1663    object_property_set_bool(OBJECT(plugged_dev), false, "realized", NULL);
1664}
1665
1666static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
1667                                     DeviceState *plugged_dev, Error **errp)
1668{
1669    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
1670    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
1671    SpaprDrc *drc = drc_from_dev(phb, pdev);
1672
1673    if (!phb->dr_enabled) {
1674        error_setg(errp, QERR_BUS_NO_HOTPLUG,
1675                   object_get_typename(OBJECT(phb)));
1676        return;
1677    }
1678
1679    g_assert(drc);
1680    g_assert(drc->dev == plugged_dev);
1681
1682    if (!spapr_drc_unplug_requested(drc)) {
1683        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
1684        uint32_t slotnr = PCI_SLOT(pdev->devfn);
1685        SpaprDrc *func_drc;
1686        SpaprDrcClass *func_drck;
1687        SpaprDREntitySense state;
1688        int i;
1689        Error *local_err = NULL;
1690        uint8_t chassis = chassis_from_bus(pci_get_bus(pdev), &local_err);
1691
1692        if (local_err) {
1693            error_propagate(errp, local_err);
1694            return;
1695        }
1696
1697        if (pc->is_bridge) {
1698            error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
1699        }
1700
1701        /* ensure any other present functions are pending unplug */
1702        if (PCI_FUNC(pdev->devfn) == 0) {
1703            for (i = 1; i < 8; i++) {
1704                func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1705                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1706                state = func_drck->dr_entity_sense(func_drc);
1707                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
1708                    && !spapr_drc_unplug_requested(func_drc)) {
1709                    error_setg(errp,
1710                               "PCI: slot %d, function %d still present. "
1711                               "Must unplug all non-0 functions first.",
1712                               slotnr, i);
1713                    return;
1714                }
1715            }
1716        }
1717
1718        spapr_drc_detach(drc);
1719
1720        /* if this isn't func 0, defer unplug event. otherwise signal removal
1721         * for all present functions
1722         */
1723        if (PCI_FUNC(pdev->devfn) == 0) {
1724            for (i = 7; i >= 0; i--) {
1725                func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
1726                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
1727                state = func_drck->dr_entity_sense(func_drc);
1728                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
1729                    spapr_hotplug_req_remove_by_index(func_drc);
1730                }
1731            }
1732        }
1733    }
1734}
1735
1736static void spapr_phb_finalizefn(Object *obj)
1737{
1738    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj);
1739
1740    g_free(sphb->dtbusname);
1741    sphb->dtbusname = NULL;
1742}
1743
1744static void spapr_phb_unrealize(DeviceState *dev, Error **errp)
1745{
1746    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1747    SysBusDevice *s = SYS_BUS_DEVICE(dev);
1748    PCIHostState *phb = PCI_HOST_BRIDGE(s);
1749    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb);
1750    SpaprTceTable *tcet;
1751    int i;
1752    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1753    Error *local_err = NULL;
1754
1755    spapr_phb_nvgpu_free(sphb);
1756
1757    if (sphb->msi) {
1758        g_hash_table_unref(sphb->msi);
1759        sphb->msi = NULL;
1760    }
1761
1762    /*
1763     * Remove IO/MMIO subregions and aliases, rest should get cleaned
1764     * via PHB's unrealize->object_finalize
1765     */
1766    for (i = windows_supported - 1; i >= 0; i--) {
1767        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
1768        if (tcet) {
1769            memory_region_del_subregion(&sphb->iommu_root,
1770                                        spapr_tce_get_iommu(tcet));
1771        }
1772    }
1773
1774    remove_drcs(sphb, phb->bus, &local_err);
1775    if (local_err) {
1776        error_propagate(errp, local_err);
1777        return;
1778    }
1779
1780    for (i = PCI_NUM_PINS - 1; i >= 0; i--) {
1781        if (sphb->lsi_table[i].irq) {
1782            spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1);
1783            sphb->lsi_table[i].irq = 0;
1784        }
1785    }
1786
1787    QLIST_REMOVE(sphb, list);
1788
1789    memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow);
1790
1791    /*
1792     * An attached PCI device may have memory listeners, eg. VFIO PCI. We have
1793     * unmapped all sections. Remove the listeners now, before destroying the
1794     * address space.
1795     */
1796    address_space_remove_listeners(&sphb->iommu_as);
1797    address_space_destroy(&sphb->iommu_as);
1798
1799    qbus_set_hotplug_handler(BUS(phb->bus), NULL, &error_abort);
1800    pci_unregister_root_bus(phb->bus);
1801
1802    memory_region_del_subregion(get_system_memory(), &sphb->iowindow);
1803    if (sphb->mem64_win_pciaddr != (hwaddr)-1) {
1804        memory_region_del_subregion(get_system_memory(), &sphb->mem64window);
1805    }
1806    memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
1807}
1808
1809static void spapr_phb_realize(DeviceState *dev, Error **errp)
1810{
1811    /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
1812     * tries to add a sPAPR PHB to a non-pseries machine.
1813     */
1814    SpaprMachineState *spapr =
1815        (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
1816                                                  TYPE_SPAPR_MACHINE);
1817    SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
1818    SysBusDevice *s = SYS_BUS_DEVICE(dev);
1819    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
1820    PCIHostState *phb = PCI_HOST_BRIDGE(s);
1821    char *namebuf;
1822    int i;
1823    PCIBus *bus;
1824    uint64_t msi_window_size = 4096;
1825    SpaprTceTable *tcet;
1826    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
1827    Error *local_err = NULL;
1828
1829    if (!spapr) {
1830        error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
1831        return;
1832    }
1833
1834    assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */
1835
1836    if (sphb->mem64_win_size != 0) {
1837        if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1838            error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
1839                       " (max 2 GiB)", sphb->mem_win_size);
1840            return;
1841        }
1842
1843        /* 64-bit window defaults to identity mapping */
1844        sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
1845    } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
1846        /*
1847         * For compatibility with old configuration, if no 64-bit MMIO
1848         * window is specified, but the ordinary (32-bit) memory
1849         * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
1850         * window, with a 64-bit MMIO window following on immediately
1851         * afterwards
1852         */
1853        sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
1854        sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
1855        sphb->mem64_win_pciaddr =
1856            SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
1857        sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
1858    }
1859
1860    if (spapr_pci_find_phb(spapr, sphb->buid)) {
1861        SpaprPhbState *s;
1862
1863        error_setg(errp, "PCI host bridges must have unique indexes");
1864        error_append_hint(errp, "The following indexes are already in use:");
1865        QLIST_FOREACH(s, &spapr->phbs, list) {
1866            error_append_hint(errp, " %d", s->index);
1867        }
1868        error_append_hint(errp, "\nTry another value for the index property\n");
1869        return;
1870    }
1871
1872    if (sphb->numa_node != -1 &&
1873        (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) {
1874        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
1875        return;
1876    }
1877
1878    sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
1879
1880    /* Initialize memory regions */
1881    namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname);
1882    memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
1883    g_free(namebuf);
1884
1885    namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname);
1886    memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
1887                             namebuf, &sphb->memspace,
1888                             SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
1889    g_free(namebuf);
1890    memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
1891                                &sphb->mem32window);
1892
1893    if (sphb->mem64_win_size != 0) {
1894        namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname);
1895        memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
1896                                 namebuf, &sphb->memspace,
1897                                 sphb->mem64_win_pciaddr, sphb->mem64_win_size);
1898        g_free(namebuf);
1899
1900        memory_region_add_subregion(get_system_memory(),
1901                                    sphb->mem64_win_addr,
1902                                    &sphb->mem64window);
1903    }
1904
1905    /* Initialize IO regions */
1906    namebuf = g_strdup_printf("%s.io", sphb->dtbusname);
1907    memory_region_init(&sphb->iospace, OBJECT(sphb),
1908                       namebuf, SPAPR_PCI_IO_WIN_SIZE);
1909    g_free(namebuf);
1910
1911    namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname);
1912    memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
1913                             &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
1914    g_free(namebuf);
1915    memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
1916                                &sphb->iowindow);
1917
1918    bus = pci_register_root_bus(dev, NULL,
1919                                pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
1920                                &sphb->memspace, &sphb->iospace,
1921                                PCI_DEVFN(0, 0), PCI_NUM_PINS,
1922                                TYPE_PCI_BUS);
1923
1924    /*
1925     * Despite resembling a vanilla PCI bus in most ways, the PAPR
1926     * para-virtualized PCI bus *does* permit PCI-E extended config
1927     * space access
1928     */
1929    if (sphb->pcie_ecs) {
1930        bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
1931    }
1932    phb->bus = bus;
1933    qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb), NULL);
1934
1935    /*
1936     * Initialize PHB address space.
1937     * By default there will be at least one subregion for default
1938     * 32bit DMA window.
1939     * Later the guest might want to create another DMA window
1940     * which will become another memory subregion.
1941     */
1942    namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname);
1943    memory_region_init(&sphb->iommu_root, OBJECT(sphb),
1944                       namebuf, UINT64_MAX);
1945    g_free(namebuf);
1946    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
1947                       sphb->dtbusname);
1948
1949    /*
1950     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
1951     * we need to allocate some memory to catch those writes coming
1952     * from msi_notify()/msix_notify().
1953     * As MSIMessage:addr is going to be the same and MSIMessage:data
1954     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
1955     * be used.
1956     *
1957     * For KVM we want to ensure that this memory is a full page so that
1958     * our memory slot is of page size granularity.
1959     */
1960    if (kvm_enabled()) {
1961        msi_window_size = getpagesize();
1962    }
1963
1964    memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr,
1965                          "msi", msi_window_size);
1966    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
1967                                &sphb->msiwindow);
1968
1969    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
1970
1971    pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
1972
1973    QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
1974
1975    /* Initialize the LSI table */
1976    for (i = 0; i < PCI_NUM_PINS; i++) {
1977        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
1978
1979        if (smc->legacy_irq_allocation) {
1980            irq = spapr_irq_findone(spapr, &local_err);
1981            if (local_err) {
1982                error_propagate_prepend(errp, local_err,
1983                                        "can't allocate LSIs: ");
1984                /*
1985                 * Older machines will never support PHB hotplug, ie, this is an
1986                 * init only path and QEMU will terminate. No need to rollback.
1987                 */
1988                return;
1989            }
1990        }
1991
1992        spapr_irq_claim(spapr, irq, true, &local_err);
1993        if (local_err) {
1994            error_propagate_prepend(errp, local_err, "can't allocate LSIs: ");
1995            goto unrealize;
1996        }
1997
1998        sphb->lsi_table[i].irq = irq;
1999    }
2000
2001    /* allocate connectors for child PCI devices */
2002    add_drcs(sphb, phb->bus, &local_err);
2003    if (local_err) {
2004        error_propagate(errp, local_err);
2005        goto unrealize;
2006    }
2007
2008    /* DMA setup */
2009    for (i = 0; i < windows_supported; ++i) {
2010        tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
2011        if (!tcet) {
2012            error_setg(errp, "Creating window#%d failed for %s",
2013                       i, sphb->dtbusname);
2014            goto unrealize;
2015        }
2016        memory_region_add_subregion(&sphb->iommu_root, 0,
2017                                    spapr_tce_get_iommu(tcet));
2018    }
2019
2020    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
2021    return;
2022
2023unrealize:
2024    spapr_phb_unrealize(dev, NULL);
2025}
2026
2027static int spapr_phb_children_reset(Object *child, void *opaque)
2028{
2029    DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
2030
2031    if (dev) {
2032        device_reset(dev);
2033    }
2034
2035    return 0;
2036}
2037
2038void spapr_phb_dma_reset(SpaprPhbState *sphb)
2039{
2040    int i;
2041    SpaprTceTable *tcet;
2042
2043    for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
2044        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
2045
2046        if (tcet && tcet->nb_table) {
2047            spapr_tce_table_disable(tcet);
2048        }
2049    }
2050
2051    /* Register default 32bit DMA window */
2052    tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
2053    spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
2054                           sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
2055}
2056
2057static void spapr_phb_reset(DeviceState *qdev)
2058{
2059    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
2060    Error *errp = NULL;
2061
2062    spapr_phb_dma_reset(sphb);
2063    spapr_phb_nvgpu_free(sphb);
2064    spapr_phb_nvgpu_setup(sphb, &errp);
2065    if (errp) {
2066        error_report_err(errp);
2067    }
2068
2069    /* Reset the IOMMU state */
2070    object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
2071
2072    if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
2073        spapr_phb_vfio_reset(qdev);
2074    }
2075}
2076
2077static Property spapr_phb_properties[] = {
2078    DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1),
2079    DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size,
2080                       SPAPR_PCI_MEM32_WIN_SIZE),
2081    DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size,
2082                       SPAPR_PCI_MEM64_WIN_SIZE),
2083    DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size,
2084                       SPAPR_PCI_IO_WIN_SIZE),
2085    DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled,
2086                     true),
2087    /* Default DMA window is 0..1GB */
2088    DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0),
2089    DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000),
2090    DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr,
2091                       0x800000000000000ULL),
2092    DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true),
2093    DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask,
2094                       (1ULL << 12) | (1ULL << 16)),
2095    DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1),
2096    DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState,
2097                     pre_2_8_migration, false),
2098    DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
2099                     pcie_ecs, true),
2100    DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
2101    DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
2102    DEFINE_PROP_END_OF_LIST(),
2103};
2104
2105static const VMStateDescription vmstate_spapr_pci_lsi = {
2106    .name = "spapr_pci/lsi",
2107    .version_id = 1,
2108    .minimum_version_id = 1,
2109    .fields = (VMStateField[]) {
2110        VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL),
2111
2112        VMSTATE_END_OF_LIST()
2113    },
2114};
2115
2116static const VMStateDescription vmstate_spapr_pci_msi = {
2117    .name = "spapr_pci/msi",
2118    .version_id = 1,
2119    .minimum_version_id = 1,
2120    .fields = (VMStateField []) {
2121        VMSTATE_UINT32(key, spapr_pci_msi_mig),
2122        VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
2123        VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
2124        VMSTATE_END_OF_LIST()
2125    },
2126};
2127
2128static int spapr_pci_pre_save(void *opaque)
2129{
2130    SpaprPhbState *sphb = opaque;
2131    GHashTableIter iter;
2132    gpointer key, value;
2133    int i;
2134
2135    if (sphb->pre_2_8_migration) {
2136        sphb->mig_liobn = sphb->dma_liobn[0];
2137        sphb->mig_mem_win_addr = sphb->mem_win_addr;
2138        sphb->mig_mem_win_size = sphb->mem_win_size;
2139        sphb->mig_io_win_addr = sphb->io_win_addr;
2140        sphb->mig_io_win_size = sphb->io_win_size;
2141
2142        if ((sphb->mem64_win_size != 0)
2143            && (sphb->mem64_win_addr
2144                == (sphb->mem_win_addr + sphb->mem_win_size))) {
2145            sphb->mig_mem_win_size += sphb->mem64_win_size;
2146        }
2147    }
2148
2149    g_free(sphb->msi_devs);
2150    sphb->msi_devs = NULL;
2151    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
2152    if (!sphb->msi_devs_num) {
2153        return 0;
2154    }
2155    sphb->msi_devs = g_new(spapr_pci_msi_mig, sphb->msi_devs_num);
2156
2157    g_hash_table_iter_init(&iter, sphb->msi);
2158    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
2159        sphb->msi_devs[i].key = *(uint32_t *) key;
2160        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
2161    }
2162
2163    return 0;
2164}
2165
2166static int spapr_pci_post_load(void *opaque, int version_id)
2167{
2168    SpaprPhbState *sphb = opaque;
2169    gpointer key, value;
2170    int i;
2171
2172    for (i = 0; i < sphb->msi_devs_num; ++i) {
2173        key = g_memdup(&sphb->msi_devs[i].key,
2174                       sizeof(sphb->msi_devs[i].key));
2175        value = g_memdup(&sphb->msi_devs[i].value,
2176                         sizeof(sphb->msi_devs[i].value));
2177        g_hash_table_insert(sphb->msi, key, value);
2178    }
2179    g_free(sphb->msi_devs);
2180    sphb->msi_devs = NULL;
2181    sphb->msi_devs_num = 0;
2182
2183    return 0;
2184}
2185
2186static bool pre_2_8_migration(void *opaque, int version_id)
2187{
2188    SpaprPhbState *sphb = opaque;
2189
2190    return sphb->pre_2_8_migration;
2191}
2192
2193static const VMStateDescription vmstate_spapr_pci = {
2194    .name = "spapr_pci",
2195    .version_id = 2,
2196    .minimum_version_id = 2,
2197    .pre_save = spapr_pci_pre_save,
2198    .post_load = spapr_pci_post_load,
2199    .fields = (VMStateField[]) {
2200        VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
2201        VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration),
2202        VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration),
2203        VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration),
2204        VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration),
2205        VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration),
2206        VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0,
2207                             vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
2208        VMSTATE_INT32(msi_devs_num, SpaprPhbState),
2209        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0,
2210                                    vmstate_spapr_pci_msi, spapr_pci_msi_mig),
2211        VMSTATE_END_OF_LIST()
2212    },
2213};
2214
2215static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
2216                                           PCIBus *rootbus)
2217{
2218    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
2219
2220    return sphb->dtbusname;
2221}
2222
2223static void spapr_phb_class_init(ObjectClass *klass, void *data)
2224{
2225    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
2226    DeviceClass *dc = DEVICE_CLASS(klass);
2227    HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
2228
2229    hc->root_bus_path = spapr_phb_root_bus_path;
2230    dc->realize = spapr_phb_realize;
2231    dc->unrealize = spapr_phb_unrealize;
2232    dc->props = spapr_phb_properties;
2233    dc->reset = spapr_phb_reset;
2234    dc->vmsd = &vmstate_spapr_pci;
2235    /* Supported by TYPE_SPAPR_MACHINE */
2236    dc->user_creatable = true;
2237    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
2238    hp->plug = spapr_pci_plug;
2239    hp->unplug = spapr_pci_unplug;
2240    hp->unplug_request = spapr_pci_unplug_request;
2241}
2242
2243static const TypeInfo spapr_phb_info = {
2244    .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
2245    .parent        = TYPE_PCI_HOST_BRIDGE,
2246    .instance_size = sizeof(SpaprPhbState),
2247    .instance_finalize = spapr_phb_finalizefn,
2248    .class_init    = spapr_phb_class_init,
2249    .interfaces    = (InterfaceInfo[]) {
2250        { TYPE_HOTPLUG_HANDLER },
2251        { }
2252    }
2253};
2254
2255static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
2256                                           void *opaque)
2257{
2258    unsigned int *bus_no = opaque;
2259    PCIBus *sec_bus = NULL;
2260
2261    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
2262         PCI_HEADER_TYPE_BRIDGE)) {
2263        return;
2264    }
2265
2266    (*bus_no)++;
2267    pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
2268    pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
2269    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2270
2271    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
2272    if (!sec_bus) {
2273        return;
2274    }
2275
2276    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
2277                        spapr_phb_pci_enumerate_bridge, bus_no);
2278    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
2279}
2280
2281static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
2282{
2283    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
2284    unsigned int bus_no = 0;
2285
2286    pci_for_each_device(bus, pci_bus_num(bus),
2287                        spapr_phb_pci_enumerate_bridge,
2288                        &bus_no);
2289
2290}
2291
2292int spapr_dt_phb(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
2293                 uint32_t nr_msis, int *node_offset)
2294{
2295    int bus_off, i, j, ret;
2296    uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
2297    struct {
2298        uint32_t hi;
2299        uint64_t child;
2300        uint64_t parent;
2301        uint64_t size;
2302    } QEMU_PACKED ranges[] = {
2303        {
2304            cpu_to_be32(b_ss(1)), cpu_to_be64(0),
2305            cpu_to_be64(phb->io_win_addr),
2306            cpu_to_be64(memory_region_size(&phb->iospace)),
2307        },
2308        {
2309            cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
2310            cpu_to_be64(phb->mem_win_addr),
2311            cpu_to_be64(phb->mem_win_size),
2312        },
2313        {
2314            cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
2315            cpu_to_be64(phb->mem64_win_addr),
2316            cpu_to_be64(phb->mem64_win_size),
2317        },
2318    };
2319    const unsigned sizeof_ranges =
2320        (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
2321    uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
2322    uint32_t interrupt_map_mask[] = {
2323        cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
2324    uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
2325    uint32_t ddw_applicable[] = {
2326        cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
2327        cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
2328        cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
2329    };
2330    uint32_t ddw_extensions[] = {
2331        cpu_to_be32(1),
2332        cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
2333    };
2334    uint32_t associativity[] = {cpu_to_be32(0x4),
2335                                cpu_to_be32(0x0),
2336                                cpu_to_be32(0x0),
2337                                cpu_to_be32(0x0),
2338                                cpu_to_be32(phb->numa_node)};
2339    SpaprTceTable *tcet;
2340    SpaprDrc *drc;
2341    Error *errp = NULL;
2342
2343    /* Start populating the FDT */
2344    _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
2345    if (node_offset) {
2346        *node_offset = bus_off;
2347    }
2348
2349    /* Write PHB properties */
2350    _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
2351    _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
2352    _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
2353    _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
2354    _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
2355    _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
2356    _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
2357    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
2358    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", nr_msis));
2359
2360    /* Dynamic DMA window */
2361    if (phb->ddw_enabled) {
2362        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
2363                         sizeof(ddw_applicable)));
2364        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
2365                         &ddw_extensions, sizeof(ddw_extensions)));
2366    }
2367
2368    /* Advertise NUMA via ibm,associativity */
2369    if (phb->numa_node != -1) {
2370        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
2371                         sizeof(associativity)));
2372    }
2373
2374    /* Build the interrupt-map, this must matches what is done
2375     * in pci_swizzle_map_irq_fn
2376     */
2377    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
2378                     &interrupt_map_mask, sizeof(interrupt_map_mask)));
2379    for (i = 0; i < PCI_SLOT_MAX; i++) {
2380        for (j = 0; j < PCI_NUM_PINS; j++) {
2381            uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
2382            int lsi_num = pci_swizzle(i, j);
2383
2384            irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
2385            irqmap[1] = 0;
2386            irqmap[2] = 0;
2387            irqmap[3] = cpu_to_be32(j+1);
2388            irqmap[4] = cpu_to_be32(intc_phandle);
2389            spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true);
2390        }
2391    }
2392    /* Write interrupt map */
2393    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
2394                     sizeof(interrupt_map)));
2395
2396    tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
2397    if (!tcet) {
2398        return -1;
2399    }
2400    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
2401                 tcet->liobn, tcet->bus_offset,
2402                 tcet->nb_table << tcet->page_shift);
2403
2404    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index);
2405    if (drc) {
2406        uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc));
2407
2408        _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index,
2409                         sizeof(drc_index)));
2410    }
2411
2412    /* Walk the bridges and program the bus numbers*/
2413    spapr_phb_pci_enumerate(phb);
2414    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
2415
2416    /* Walk the bridge and subordinate buses */
2417    ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off);
2418    if (ret < 0) {
2419        return ret;
2420    }
2421
2422    spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &errp);
2423    if (errp) {
2424        error_report_err(errp);
2425    }
2426    spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
2427
2428    return 0;
2429}
2430
2431void spapr_pci_rtas_init(void)
2432{
2433    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
2434                        rtas_read_pci_config);
2435    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
2436                        rtas_write_pci_config);
2437    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
2438                        rtas_ibm_read_pci_config);
2439    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
2440                        rtas_ibm_write_pci_config);
2441    if (msi_nonbroken) {
2442        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
2443                            "ibm,query-interrupt-source-number",
2444                            rtas_ibm_query_interrupt_source_number);
2445        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
2446                            rtas_ibm_change_msi);
2447    }
2448
2449    spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
2450                        "ibm,set-eeh-option",
2451                        rtas_ibm_set_eeh_option);
2452    spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
2453                        "ibm,get-config-addr-info2",
2454                        rtas_ibm_get_config_addr_info2);
2455    spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
2456                        "ibm,read-slot-reset-state2",
2457                        rtas_ibm_read_slot_reset_state2);
2458    spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
2459                        "ibm,set-slot-reset",
2460                        rtas_ibm_set_slot_reset);
2461    spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
2462                        "ibm,configure-pe",
2463                        rtas_ibm_configure_pe);
2464    spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
2465                        "ibm,slot-error-detail",
2466                        rtas_ibm_slot_error_detail);
2467}
2468
2469static void spapr_pci_register_types(void)
2470{
2471    type_register_static(&spapr_phb_info);
2472}
2473
2474type_init(spapr_pci_register_types)
2475
2476static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
2477{
2478    bool be = *(bool *)opaque;
2479
2480    if (object_dynamic_cast(OBJECT(dev), "VGA")
2481        || object_dynamic_cast(OBJECT(dev), "secondary-vga")) {
2482        object_property_set_bool(OBJECT(dev), be, "big-endian-framebuffer",
2483                                 &error_abort);
2484    }
2485    return 0;
2486}
2487
2488void spapr_pci_switch_vga(bool big_endian)
2489{
2490    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
2491    SpaprPhbState *sphb;
2492
2493    /*
2494     * For backward compatibility with existing guests, we switch
2495     * the endianness of the VGA controller when changing the guest
2496     * interrupt mode
2497     */
2498    QLIST_FOREACH(sphb, &spapr->phbs, list) {
2499        BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
2500        qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
2501                           &big_endian);
2502    }
2503}
2504