linux/arch/powerpc/platforms/powernv/pci-ioda.c
<<
>>
Prefs
   1/*
   2 * Support PCI/PCIe on PowerNV platforms
   3 *
   4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#undef DEBUG
  13
  14#include <linux/kernel.h>
  15#include <linux/pci.h>
  16#include <linux/debugfs.h>
  17#include <linux/delay.h>
  18#include <linux/string.h>
  19#include <linux/init.h>
  20#include <linux/bootmem.h>
  21#include <linux/irq.h>
  22#include <linux/io.h>
  23#include <linux/msi.h>
  24
  25#include <asm/sections.h>
  26#include <asm/io.h>
  27#include <asm/prom.h>
  28#include <asm/pci-bridge.h>
  29#include <asm/machdep.h>
  30#include <asm/msi_bitmap.h>
  31#include <asm/ppc-pci.h>
  32#include <asm/opal.h>
  33#include <asm/iommu.h>
  34#include <asm/tce.h>
  35#include <asm/xics.h>
  36#include <asm/debug.h>
  37
  38#include "powernv.h"
  39#include "pci.h"
  40
  41#define define_pe_printk_level(func, kern_level)                \
  42static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)     \
  43{                                                               \
  44        struct va_format vaf;                                   \
  45        va_list args;                                           \
  46        char pfix[32];                                          \
  47        int r;                                                  \
  48                                                                \
  49        va_start(args, fmt);                                    \
  50                                                                \
  51        vaf.fmt = fmt;                                          \
  52        vaf.va = &args;                                         \
  53                                                                \
  54        if (pe->pdev)                                           \
  55                strlcpy(pfix, dev_name(&pe->pdev->dev),         \
  56                        sizeof(pfix));                          \
  57        else                                                    \
  58                sprintf(pfix, "%04x:%02x     ",                 \
  59                        pci_domain_nr(pe->pbus),                \
  60                        pe->pbus->number);                      \
  61        r = printk(kern_level "pci %s: [PE# %.3d] %pV",         \
  62                   pfix, pe->pe_number, &vaf);                  \
  63                                                                \
  64        va_end(args);                                           \
  65                                                                \
  66        return r;                                               \
  67}                                                               \
  68
  69define_pe_printk_level(pe_err, KERN_ERR);
  70define_pe_printk_level(pe_warn, KERN_WARNING);
  71define_pe_printk_level(pe_info, KERN_INFO);
  72
  73static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
  74{
  75        unsigned long pe;
  76
  77        do {
  78                pe = find_next_zero_bit(phb->ioda.pe_alloc,
  79                                        phb->ioda.total_pe, 0);
  80                if (pe >= phb->ioda.total_pe)
  81                        return IODA_INVALID_PE;
  82        } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
  83
  84        phb->ioda.pe_array[pe].phb = phb;
  85        phb->ioda.pe_array[pe].pe_number = pe;
  86        return pe;
  87}
  88
  89static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
  90{
  91        WARN_ON(phb->ioda.pe_array[pe].pdev);
  92
  93        memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
  94        clear_bit(pe, phb->ioda.pe_alloc);
  95}
  96
  97/* Currently those 2 are only used when MSIs are enabled, this will change
  98 * but in the meantime, we need to protect them to avoid warnings
  99 */
 100#ifdef CONFIG_PCI_MSI
 101static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
 102{
 103        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 104        struct pnv_phb *phb = hose->private_data;
 105        struct pci_dn *pdn = pci_get_pdn(dev);
 106
 107        if (!pdn)
 108                return NULL;
 109        if (pdn->pe_number == IODA_INVALID_PE)
 110                return NULL;
 111        return &phb->ioda.pe_array[pdn->pe_number];
 112}
 113#endif /* CONFIG_PCI_MSI */
 114
 115static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 116{
 117        struct pci_dev *parent;
 118        uint8_t bcomp, dcomp, fcomp;
 119        long rc, rid_end, rid;
 120
 121        /* Bus validation ? */
 122        if (pe->pbus) {
 123                int count;
 124
 125                dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
 126                fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
 127                parent = pe->pbus->self;
 128                if (pe->flags & PNV_IODA_PE_BUS_ALL)
 129                        count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
 130                else
 131                        count = 1;
 132
 133                switch(count) {
 134                case  1: bcomp = OpalPciBusAll;         break;
 135                case  2: bcomp = OpalPciBus7Bits;       break;
 136                case  4: bcomp = OpalPciBus6Bits;       break;
 137                case  8: bcomp = OpalPciBus5Bits;       break;
 138                case 16: bcomp = OpalPciBus4Bits;       break;
 139                case 32: bcomp = OpalPciBus3Bits;       break;
 140                default:
 141                        pr_err("%s: Number of subordinate busses %d"
 142                               " unsupported\n",
 143                               pci_name(pe->pbus->self), count);
 144                        /* Do an exact match only */
 145                        bcomp = OpalPciBusAll;
 146                }
 147                rid_end = pe->rid + (count << 8);
 148        } else {
 149                parent = pe->pdev->bus->self;
 150                bcomp = OpalPciBusAll;
 151                dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
 152                fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
 153                rid_end = pe->rid + 1;
 154        }
 155
 156        /* Associate PE in PELT */
 157        rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
 158                             bcomp, dcomp, fcomp, OPAL_MAP_PE);
 159        if (rc) {
 160                pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
 161                return -ENXIO;
 162        }
 163        opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
 164                                  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 165
 166        /* Add to all parents PELT-V */
 167        while (parent) {
 168                struct pci_dn *pdn = pci_get_pdn(parent);
 169                if (pdn && pdn->pe_number != IODA_INVALID_PE) {
 170                        rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
 171                                                pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
 172                        /* XXX What to do in case of error ? */
 173                }
 174                parent = parent->bus->self;
 175        }
 176        /* Setup reverse map */
 177        for (rid = pe->rid; rid < rid_end; rid++)
 178                phb->ioda.pe_rmap[rid] = pe->pe_number;
 179
 180        /* Setup one MVTs on IODA1 */
 181        if (phb->type == PNV_PHB_IODA1) {
 182                pe->mve_number = pe->pe_number;
 183                rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
 184                                      pe->pe_number);
 185                if (rc) {
 186                        pe_err(pe, "OPAL error %ld setting up MVE %d\n",
 187                               rc, pe->mve_number);
 188                        pe->mve_number = -1;
 189                } else {
 190                        rc = opal_pci_set_mve_enable(phb->opal_id,
 191                                                     pe->mve_number, OPAL_ENABLE_MVE);
 192                        if (rc) {
 193                                pe_err(pe, "OPAL error %ld enabling MVE %d\n",
 194                                       rc, pe->mve_number);
 195                                pe->mve_number = -1;
 196                        }
 197                }
 198        } else if (phb->type == PNV_PHB_IODA2)
 199                pe->mve_number = 0;
 200
 201        return 0;
 202}
 203
 204static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
 205                                       struct pnv_ioda_pe *pe)
 206{
 207        struct pnv_ioda_pe *lpe;
 208
 209        list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
 210                if (lpe->dma_weight < pe->dma_weight) {
 211                        list_add_tail(&pe->dma_link, &lpe->dma_link);
 212                        return;
 213                }
 214        }
 215        list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
 216}
 217
 218static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
 219{
 220        /* This is quite simplistic. The "base" weight of a device
 221         * is 10. 0 means no DMA is to be accounted for it.
 222         */
 223
 224        /* If it's a bridge, no DMA */
 225        if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 226                return 0;
 227
 228        /* Reduce the weight of slow USB controllers */
 229        if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
 230            dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
 231            dev->class == PCI_CLASS_SERIAL_USB_EHCI)
 232                return 3;
 233
 234        /* Increase the weight of RAID (includes Obsidian) */
 235        if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
 236                return 15;
 237
 238        /* Default */
 239        return 10;
 240}
 241
 242#if 0
 243static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 244{
 245        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 246        struct pnv_phb *phb = hose->private_data;
 247        struct pci_dn *pdn = pci_get_pdn(dev);
 248        struct pnv_ioda_pe *pe;
 249        int pe_num;
 250
 251        if (!pdn) {
 252                pr_err("%s: Device tree node not associated properly\n",
 253                           pci_name(dev));
 254                return NULL;
 255        }
 256        if (pdn->pe_number != IODA_INVALID_PE)
 257                return NULL;
 258
 259        /* PE#0 has been pre-set */
 260        if (dev->bus->number == 0)
 261                pe_num = 0;
 262        else
 263                pe_num = pnv_ioda_alloc_pe(phb);
 264        if (pe_num == IODA_INVALID_PE) {
 265                pr_warning("%s: Not enough PE# available, disabling device\n",
 266                           pci_name(dev));
 267                return NULL;
 268        }
 269
 270        /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
 271         * pointer in the PE data structure, both should be destroyed at the
 272         * same time. However, this needs to be looked at more closely again
 273         * once we actually start removing things (Hotplug, SR-IOV, ...)
 274         *
 275         * At some point we want to remove the PDN completely anyways
 276         */
 277        pe = &phb->ioda.pe_array[pe_num];
 278        pci_dev_get(dev);
 279        pdn->pcidev = dev;
 280        pdn->pe_number = pe_num;
 281        pe->pdev = dev;
 282        pe->pbus = NULL;
 283        pe->tce32_seg = -1;
 284        pe->mve_number = -1;
 285        pe->rid = dev->bus->number << 8 | pdn->devfn;
 286
 287        pe_info(pe, "Associated device to PE\n");
 288
 289        if (pnv_ioda_configure_pe(phb, pe)) {
 290                /* XXX What do we do here ? */
 291                if (pe_num)
 292                        pnv_ioda_free_pe(phb, pe_num);
 293                pdn->pe_number = IODA_INVALID_PE;
 294                pe->pdev = NULL;
 295                pci_dev_put(dev);
 296                return NULL;
 297        }
 298
 299        /* Assign a DMA weight to the device */
 300        pe->dma_weight = pnv_ioda_dma_weight(dev);
 301        if (pe->dma_weight != 0) {
 302                phb->ioda.dma_weight += pe->dma_weight;
 303                phb->ioda.dma_pe_count++;
 304        }
 305
 306        /* Link the PE */
 307        pnv_ioda_link_pe_by_weight(phb, pe);
 308
 309        return pe;
 310}
 311#endif /* Useful for SRIOV case */
 312
 313static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 314{
 315        struct pci_dev *dev;
 316
 317        list_for_each_entry(dev, &bus->devices, bus_list) {
 318                struct pci_dn *pdn = pci_get_pdn(dev);
 319
 320                if (pdn == NULL) {
 321                        pr_warn("%s: No device node associated with device !\n",
 322                                pci_name(dev));
 323                        continue;
 324                }
 325                pci_dev_get(dev);
 326                pdn->pcidev = dev;
 327                pdn->pe_number = pe->pe_number;
 328                pe->dma_weight += pnv_ioda_dma_weight(dev);
 329                if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
 330                        pnv_ioda_setup_same_PE(dev->subordinate, pe);
 331        }
 332}
 333
 334/*
 335 * There're 2 types of PCI bus sensitive PEs: One that is compromised of
 336 * single PCI bus. Another one that contains the primary PCI bus and its
 337 * subordinate PCI devices and buses. The second type of PE is normally
 338 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
 339 */
 340static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 341{
 342        struct pci_controller *hose = pci_bus_to_host(bus);
 343        struct pnv_phb *phb = hose->private_data;
 344        struct pnv_ioda_pe *pe;
 345        int pe_num;
 346
 347        pe_num = pnv_ioda_alloc_pe(phb);
 348        if (pe_num == IODA_INVALID_PE) {
 349                pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
 350                        __func__, pci_domain_nr(bus), bus->number);
 351                return;
 352        }
 353
 354        pe = &phb->ioda.pe_array[pe_num];
 355        pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
 356        pe->pbus = bus;
 357        pe->pdev = NULL;
 358        pe->tce32_seg = -1;
 359        pe->mve_number = -1;
 360        pe->rid = bus->busn_res.start << 8;
 361        pe->dma_weight = 0;
 362
 363        if (all)
 364                pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
 365                        bus->busn_res.start, bus->busn_res.end, pe_num);
 366        else
 367                pe_info(pe, "Secondary bus %d associated with PE#%d\n",
 368                        bus->busn_res.start, pe_num);
 369
 370        if (pnv_ioda_configure_pe(phb, pe)) {
 371                /* XXX What do we do here ? */
 372                if (pe_num)
 373                        pnv_ioda_free_pe(phb, pe_num);
 374                pe->pbus = NULL;
 375                return;
 376        }
 377
 378        /* Associate it with all child devices */
 379        pnv_ioda_setup_same_PE(bus, pe);
 380
 381        /* Put PE to the list */
 382        list_add_tail(&pe->list, &phb->ioda.pe_list);
 383
 384        /* Account for one DMA PE if at least one DMA capable device exist
 385         * below the bridge
 386         */
 387        if (pe->dma_weight != 0) {
 388                phb->ioda.dma_weight += pe->dma_weight;
 389                phb->ioda.dma_pe_count++;
 390        }
 391
 392        /* Link the PE */
 393        pnv_ioda_link_pe_by_weight(phb, pe);
 394}
 395
 396static void pnv_ioda_setup_PEs(struct pci_bus *bus)
 397{
 398        struct pci_dev *dev;
 399
 400        pnv_ioda_setup_bus_PE(bus, 0);
 401
 402        list_for_each_entry(dev, &bus->devices, bus_list) {
 403                if (dev->subordinate) {
 404                        if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
 405                                pnv_ioda_setup_bus_PE(dev->subordinate, 1);
 406                        else
 407                                pnv_ioda_setup_PEs(dev->subordinate);
 408                }
 409        }
 410}
 411
 412/*
 413 * Configure PEs so that the downstream PCI buses and devices
 414 * could have their associated PE#. Unfortunately, we didn't
 415 * figure out the way to identify the PLX bridge yet. So we
 416 * simply put the PCI bus and the subordinate behind the root
 417 * port to PE# here. The game rule here is expected to be changed
 418 * as soon as we can detected PLX bridge correctly.
 419 */
 420static void pnv_pci_ioda_setup_PEs(void)
 421{
 422        struct pci_controller *hose, *tmp;
 423
 424        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 425                pnv_ioda_setup_PEs(hose->bus);
 426        }
 427}
 428
 429static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 430{
 431        struct pci_dn *pdn = pci_get_pdn(pdev);
 432        struct pnv_ioda_pe *pe;
 433
 434        /*
 435         * The function can be called while the PE#
 436         * hasn't been assigned. Do nothing for the
 437         * case.
 438         */
 439        if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 440                return;
 441
 442        pe = &phb->ioda.pe_array[pdn->pe_number];
 443        set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 444}
 445
 446static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 447{
 448        struct pci_dev *dev;
 449
 450        list_for_each_entry(dev, &bus->devices, bus_list) {
 451                set_iommu_table_base(&dev->dev, &pe->tce32_table);
 452                if (dev->subordinate)
 453                        pnv_ioda_setup_bus_dma(pe, dev->subordinate);
 454        }
 455}
 456
 457static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 458                                         u64 *startp, u64 *endp)
 459{
 460        u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
 461        unsigned long start, end, inc;
 462
 463        start = __pa(startp);
 464        end = __pa(endp);
 465
 466        /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
 467        if (tbl->it_busno) {
 468                start <<= 12;
 469                end <<= 12;
 470                inc = 128 << 12;
 471                start |= tbl->it_busno;
 472                end |= tbl->it_busno;
 473        } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
 474                /* p7ioc-style invalidation, 2 TCEs per write */
 475                start |= (1ull << 63);
 476                end |= (1ull << 63);
 477                inc = 16;
 478        } else {
 479                /* Default (older HW) */
 480                inc = 128;
 481        }
 482
 483        end |= inc - 1; /* round up end to be different than start */
 484
 485        mb(); /* Ensure above stores are visible */
 486        while (start <= end) {
 487                __raw_writeq(start, invalidate);
 488                start += inc;
 489        }
 490
 491        /*
 492         * The iommu layer will do another mb() for us on build()
 493         * and we don't care on free()
 494         */
 495}
 496
 497static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 498                                         struct iommu_table *tbl,
 499                                         u64 *startp, u64 *endp)
 500{
 501        unsigned long start, end, inc;
 502        u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
 503
 504        /* We'll invalidate DMA address in PE scope */
 505        start = 0x2ul << 60;
 506        start |= (pe->pe_number & 0xFF);
 507        end = start;
 508
 509        /* Figure out the start, end and step */
 510        inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
 511        start |= (inc << 12);
 512        inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
 513        end |= (inc << 12);
 514        inc = (0x1ul << 12);
 515        mb();
 516
 517        while (start <= end) {
 518                __raw_writeq(start, invalidate);
 519                start += inc;
 520        }
 521}
 522
 523void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 524                                 u64 *startp, u64 *endp)
 525{
 526        struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
 527                                              tce32_table);
 528        struct pnv_phb *phb = pe->phb;
 529
 530        if (phb->type == PNV_PHB_IODA1)
 531                pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
 532        else
 533                pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
 534}
 535
 536static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 537                                      struct pnv_ioda_pe *pe, unsigned int base,
 538                                      unsigned int segs)
 539{
 540
 541        struct page *tce_mem = NULL;
 542        const __be64 *swinvp;
 543        struct iommu_table *tbl;
 544        unsigned int i;
 545        int64_t rc;
 546        void *addr;
 547
 548        /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
 549#define TCE32_TABLE_SIZE        ((0x10000000 / 0x1000) * 8)
 550
 551        /* XXX FIXME: Handle 64-bit only DMA devices */
 552        /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
 553        /* XXX FIXME: Allocate multi-level tables on PHB3 */
 554
 555        /* We shouldn't already have a 32-bit DMA associated */
 556        if (WARN_ON(pe->tce32_seg >= 0))
 557                return;
 558
 559        /* Grab a 32-bit TCE table */
 560        pe->tce32_seg = base;
 561        pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
 562                (base << 28), ((base + segs) << 28) - 1);
 563
 564        /* XXX Currently, we allocate one big contiguous table for the
 565         * TCEs. We only really need one chunk per 256M of TCE space
 566         * (ie per segment) but that's an optimization for later, it
 567         * requires some added smarts with our get/put_tce implementation
 568         */
 569        tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
 570                                   get_order(TCE32_TABLE_SIZE * segs));
 571        if (!tce_mem) {
 572                pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
 573                goto fail;
 574        }
 575        addr = page_address(tce_mem);
 576        memset(addr, 0, TCE32_TABLE_SIZE * segs);
 577
 578        /* Configure HW */
 579        for (i = 0; i < segs; i++) {
 580                rc = opal_pci_map_pe_dma_window(phb->opal_id,
 581                                              pe->pe_number,
 582                                              base + i, 1,
 583                                              __pa(addr) + TCE32_TABLE_SIZE * i,
 584                                              TCE32_TABLE_SIZE, 0x1000);
 585                if (rc) {
 586                        pe_err(pe, " Failed to configure 32-bit TCE table,"
 587                               " err %ld\n", rc);
 588                        goto fail;
 589                }
 590        }
 591
 592        /* Setup linux iommu table */
 593        tbl = &pe->tce32_table;
 594        pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
 595                                  base << 28);
 596
 597        /* OPAL variant of P7IOC SW invalidated TCEs */
 598        swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
 599        if (swinvp) {
 600                /* We need a couple more fields -- an address and a data
 601                 * to or.  Since the bus is only printed out on table free
 602                 * errors, and on the first pass the data will be a relative
 603                 * bus number, print that out instead.
 604                 */
 605                tbl->it_busno = 0;
 606                tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
 607                tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
 608                               TCE_PCI_SWINV_PAIR;
 609        }
 610        iommu_init_table(tbl, phb->hose->node);
 611        iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
 612
 613        if (pe->pdev)
 614                set_iommu_table_base(&pe->pdev->dev, tbl);
 615        else
 616                pnv_ioda_setup_bus_dma(pe, pe->pbus);
 617
 618        return;
 619 fail:
 620        /* XXX Failure: Try to fallback to 64-bit only ? */
 621        if (pe->tce32_seg >= 0)
 622                pe->tce32_seg = -1;
 623        if (tce_mem)
 624                __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 625}
 626
 627static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 628                                       struct pnv_ioda_pe *pe)
 629{
 630        struct page *tce_mem = NULL;
 631        void *addr;
 632        const __be64 *swinvp;
 633        struct iommu_table *tbl;
 634        unsigned int tce_table_size, end;
 635        int64_t rc;
 636
 637        /* We shouldn't already have a 32-bit DMA associated */
 638        if (WARN_ON(pe->tce32_seg >= 0))
 639                return;
 640
 641        /* The PE will reserve all possible 32-bits space */
 642        pe->tce32_seg = 0;
 643        end = (1 << ilog2(phb->ioda.m32_pci_base));
 644        tce_table_size = (end / 0x1000) * 8;
 645        pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
 646                end);
 647
 648        /* Allocate TCE table */
 649        tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
 650                                   get_order(tce_table_size));
 651        if (!tce_mem) {
 652                pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
 653                goto fail;
 654        }
 655        addr = page_address(tce_mem);
 656        memset(addr, 0, tce_table_size);
 657
 658        /*
 659         * Map TCE table through TVT. The TVE index is the PE number
 660         * shifted by 1 bit for 32-bits DMA space.
 661         */
 662        rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
 663                                        pe->pe_number << 1, 1, __pa(addr),
 664                                        tce_table_size, 0x1000);
 665        if (rc) {
 666                pe_err(pe, "Failed to configure 32-bit TCE table,"
 667                       " err %ld\n", rc);
 668                goto fail;
 669        }
 670
 671        /* Setup linux iommu table */
 672        tbl = &pe->tce32_table;
 673        pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
 674
 675        /* OPAL variant of PHB3 invalidated TCEs */
 676        swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
 677        if (swinvp) {
 678                /* We need a couple more fields -- an address and a data
 679                 * to or.  Since the bus is only printed out on table free
 680                 * errors, and on the first pass the data will be a relative
 681                 * bus number, print that out instead.
 682                 */
 683                tbl->it_busno = 0;
 684                tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
 685                tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
 686        }
 687        iommu_init_table(tbl, phb->hose->node);
 688
 689        if (pe->pdev)
 690                set_iommu_table_base(&pe->pdev->dev, tbl);
 691        else
 692                pnv_ioda_setup_bus_dma(pe, pe->pbus);
 693
 694        return;
 695fail:
 696        if (pe->tce32_seg >= 0)
 697                pe->tce32_seg = -1;
 698        if (tce_mem)
 699                __free_pages(tce_mem, get_order(tce_table_size));
 700}
 701
 702static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 703{
 704        struct pci_controller *hose = phb->hose;
 705        unsigned int residual, remaining, segs, tw, base;
 706        struct pnv_ioda_pe *pe;
 707
 708        /* If we have more PE# than segments available, hand out one
 709         * per PE until we run out and let the rest fail. If not,
 710         * then we assign at least one segment per PE, plus more based
 711         * on the amount of devices under that PE
 712         */
 713        if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
 714                residual = 0;
 715        else
 716                residual = phb->ioda.tce32_count -
 717                        phb->ioda.dma_pe_count;
 718
 719        pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
 720                hose->global_number, phb->ioda.tce32_count);
 721        pr_info("PCI: %d PE# for a total weight of %d\n",
 722                phb->ioda.dma_pe_count, phb->ioda.dma_weight);
 723
 724        /* Walk our PE list and configure their DMA segments, hand them
 725         * out one base segment plus any residual segments based on
 726         * weight
 727         */
 728        remaining = phb->ioda.tce32_count;
 729        tw = phb->ioda.dma_weight;
 730        base = 0;
 731        list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
 732                if (!pe->dma_weight)
 733                        continue;
 734                if (!remaining) {
 735                        pe_warn(pe, "No DMA32 resources available\n");
 736                        continue;
 737                }
 738                segs = 1;
 739                if (residual) {
 740                        segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
 741                        if (segs > remaining)
 742                                segs = remaining;
 743                }
 744
 745                /*
 746                 * For IODA2 compliant PHB3, we needn't care about the weight.
 747                 * The all available 32-bits DMA space will be assigned to
 748                 * the specific PE.
 749                 */
 750                if (phb->type == PNV_PHB_IODA1) {
 751                        pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
 752                                pe->dma_weight, segs);
 753                        pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
 754                } else {
 755                        pe_info(pe, "Assign DMA32 space\n");
 756                        segs = 0;
 757                        pnv_pci_ioda2_setup_dma_pe(phb, pe);
 758                }
 759
 760                remaining -= segs;
 761                base += segs;
 762        }
 763}
 764
 765#ifdef CONFIG_PCI_MSI
 766static void pnv_ioda2_msi_eoi(struct irq_data *d)
 767{
 768        unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 769        struct irq_chip *chip = irq_data_get_irq_chip(d);
 770        struct pnv_phb *phb = container_of(chip, struct pnv_phb,
 771                                           ioda.irq_chip);
 772        int64_t rc;
 773
 774        rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
 775        WARN_ON_ONCE(rc);
 776
 777        icp_native_eoi(d);
 778}
 779
 780static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 781                                  unsigned int hwirq, unsigned int virq,
 782                                  unsigned int is_64, struct msi_msg *msg)
 783{
 784        struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
 785        struct pci_dn *pdn = pci_get_pdn(dev);
 786        struct irq_data *idata;
 787        struct irq_chip *ichip;
 788        unsigned int xive_num = hwirq - phb->msi_base;
 789        uint64_t addr64;
 790        uint32_t addr32, data;
 791        int rc;
 792
 793        /* No PE assigned ? bail out ... no MSI for you ! */
 794        if (pe == NULL)
 795                return -ENXIO;
 796
 797        /* Check if we have an MVE */
 798        if (pe->mve_number < 0)
 799                return -ENXIO;
 800
 801        /* Force 32-bit MSI on some broken devices */
 802        if (pdn && pdn->force_32bit_msi)
 803                is_64 = 0;
 804
 805        /* Assign XIVE to PE */
 806        rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
 807        if (rc) {
 808                pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
 809                        pci_name(dev), rc, xive_num);
 810                return -EIO;
 811        }
 812
 813        if (is_64) {
 814                rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
 815                                     &addr64, &data);
 816                if (rc) {
 817                        pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
 818                                pci_name(dev), rc);
 819                        return -EIO;
 820                }
 821                msg->address_hi = addr64 >> 32;
 822                msg->address_lo = addr64 & 0xfffffffful;
 823        } else {
 824                rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
 825                                     &addr32, &data);
 826                if (rc) {
 827                        pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
 828                                pci_name(dev), rc);
 829                        return -EIO;
 830                }
 831                msg->address_hi = 0;
 832                msg->address_lo = addr32;
 833        }
 834        msg->data = data;
 835
 836        /*
 837         * Change the IRQ chip for the MSI interrupts on PHB3.
 838         * The corresponding IRQ chip should be populated for
 839         * the first time.
 840         */
 841        if (phb->type == PNV_PHB_IODA2) {
 842                if (!phb->ioda.irq_chip_init) {
 843                        idata = irq_get_irq_data(virq);
 844                        ichip = irq_data_get_irq_chip(idata);
 845                        phb->ioda.irq_chip_init = 1;
 846                        phb->ioda.irq_chip = *ichip;
 847                        phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
 848                }
 849
 850                irq_set_chip(virq, &phb->ioda.irq_chip);
 851        }
 852
 853        pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
 854                 " address=%x_%08x data=%x PE# %d\n",
 855                 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
 856                 msg->address_hi, msg->address_lo, data, pe->pe_number);
 857
 858        return 0;
 859}
 860
 861static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 862{
 863        unsigned int count;
 864        const __be32 *prop = of_get_property(phb->hose->dn,
 865                                             "ibm,opal-msi-ranges", NULL);
 866        if (!prop) {
 867                /* BML Fallback */
 868                prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
 869        }
 870        if (!prop)
 871                return;
 872
 873        phb->msi_base = be32_to_cpup(prop);
 874        count = be32_to_cpup(prop + 1);
 875        if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
 876                pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
 877                       phb->hose->global_number);
 878                return;
 879        }
 880
 881        phb->msi_setup = pnv_pci_ioda_msi_setup;
 882        phb->msi32_support = 1;
 883        pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 884                count, phb->msi_base);
 885}
 886#else
 887static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
 888#endif /* CONFIG_PCI_MSI */
 889
 890/*
 891 * This function is supposed to be called on basis of PE from top
 892 * to bottom style. So the the I/O or MMIO segment assigned to
 893 * parent PE could be overrided by its child PEs if necessary.
 894 */
 895static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 896                                  struct pnv_ioda_pe *pe)
 897{
 898        struct pnv_phb *phb = hose->private_data;
 899        struct pci_bus_region region;
 900        struct resource *res;
 901        int i, index;
 902        int rc;
 903
 904        /*
 905         * NOTE: We only care PCI bus based PE for now. For PCI
 906         * device based PE, for example SRIOV sensitive VF should
 907         * be figured out later.
 908         */
 909        BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
 910
 911        pci_bus_for_each_resource(pe->pbus, res, i) {
 912                if (!res || !res->flags ||
 913                    res->start > res->end)
 914                        continue;
 915
 916                if (res->flags & IORESOURCE_IO) {
 917                        region.start = res->start - phb->ioda.io_pci_base;
 918                        region.end   = res->end - phb->ioda.io_pci_base;
 919                        index = region.start / phb->ioda.io_segsize;
 920
 921                        while (index < phb->ioda.total_pe &&
 922                               region.start <= region.end) {
 923                                phb->ioda.io_segmap[index] = pe->pe_number;
 924                                rc = opal_pci_map_pe_mmio_window(phb->opal_id,
 925                                        pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
 926                                if (rc != OPAL_SUCCESS) {
 927                                        pr_err("%s: OPAL error %d when mapping IO "
 928                                               "segment #%d to PE#%d\n",
 929                                               __func__, rc, index, pe->pe_number);
 930                                        break;
 931                                }
 932
 933                                region.start += phb->ioda.io_segsize;
 934                                index++;
 935                        }
 936                } else if (res->flags & IORESOURCE_MEM) {
 937                        /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
 938                         * harden that algorithm when we start supporting M64
 939                         */
 940                        region.start = res->start -
 941                                       hose->mem_offset[0] -
 942                                       phb->ioda.m32_pci_base;
 943                        region.end   = res->end -
 944                                       hose->mem_offset[0] -
 945                                       phb->ioda.m32_pci_base;
 946                        index = region.start / phb->ioda.m32_segsize;
 947
 948                        while (index < phb->ioda.total_pe &&
 949                               region.start <= region.end) {
 950                                phb->ioda.m32_segmap[index] = pe->pe_number;
 951                                rc = opal_pci_map_pe_mmio_window(phb->opal_id,
 952                                        pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
 953                                if (rc != OPAL_SUCCESS) {
 954                                        pr_err("%s: OPAL error %d when mapping M32 "
 955                                               "segment#%d to PE#%d",
 956                                               __func__, rc, index, pe->pe_number);
 957                                        break;
 958                                }
 959
 960                                region.start += phb->ioda.m32_segsize;
 961                                index++;
 962                        }
 963                }
 964        }
 965}
 966
 967static void pnv_pci_ioda_setup_seg(void)
 968{
 969        struct pci_controller *tmp, *hose;
 970        struct pnv_phb *phb;
 971        struct pnv_ioda_pe *pe;
 972
 973        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 974                phb = hose->private_data;
 975                list_for_each_entry(pe, &phb->ioda.pe_list, list) {
 976                        pnv_ioda_setup_pe_seg(hose, pe);
 977                }
 978        }
 979}
 980
 981static void pnv_pci_ioda_setup_DMA(void)
 982{
 983        struct pci_controller *hose, *tmp;
 984        struct pnv_phb *phb;
 985
 986        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 987                pnv_ioda_setup_dma(hose->private_data);
 988
 989                /* Mark the PHB initialization done */
 990                phb = hose->private_data;
 991                phb->initialized = 1;
 992        }
 993}
 994
 995static void pnv_pci_ioda_create_dbgfs(void)
 996{
 997#ifdef CONFIG_DEBUG_FS
 998        struct pci_controller *hose, *tmp;
 999        struct pnv_phb *phb;
1000        char name[16];
1001
1002        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1003                phb = hose->private_data;
1004
1005                sprintf(name, "PCI%04x", hose->global_number);
1006                phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1007                if (!phb->dbgfs)
1008                        pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1009                                __func__, hose->global_number);
1010        }
1011#endif /* CONFIG_DEBUG_FS */
1012}
1013
1014static void pnv_pci_ioda_fixup(void)
1015{
1016        pnv_pci_ioda_setup_PEs();
1017        pnv_pci_ioda_setup_seg();
1018        pnv_pci_ioda_setup_DMA();
1019
1020        pnv_pci_ioda_create_dbgfs();
1021
1022#ifdef CONFIG_EEH
1023        eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1024        eeh_addr_cache_build();
1025        eeh_init();
1026#endif
1027}
1028
1029/*
1030 * Returns the alignment for I/O or memory windows for P2P
1031 * bridges. That actually depends on how PEs are segmented.
1032 * For now, we return I/O or M32 segment size for PE sensitive
1033 * P2P bridges. Otherwise, the default values (4KiB for I/O,
1034 * 1MiB for memory) will be returned.
1035 *
1036 * The current PCI bus might be put into one PE, which was
1037 * create against the parent PCI bridge. For that case, we
1038 * needn't enlarge the alignment so that we can save some
1039 * resources.
1040 */
1041static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
1042                                                unsigned long type)
1043{
1044        struct pci_dev *bridge;
1045        struct pci_controller *hose = pci_bus_to_host(bus);
1046        struct pnv_phb *phb = hose->private_data;
1047        int num_pci_bridges = 0;
1048
1049        bridge = bus->self;
1050        while (bridge) {
1051                if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
1052                        num_pci_bridges++;
1053                        if (num_pci_bridges >= 2)
1054                                return 1;
1055                }
1056
1057                bridge = bridge->bus->self;
1058        }
1059
1060        /* We need support prefetchable memory window later */
1061        if (type & IORESOURCE_MEM)
1062                return phb->ioda.m32_segsize;
1063
1064        return phb->ioda.io_segsize;
1065}
1066
1067/* Prevent enabling devices for which we couldn't properly
1068 * assign a PE
1069 */
1070static int pnv_pci_enable_device_hook(struct pci_dev *dev)
1071{
1072        struct pci_controller *hose = pci_bus_to_host(dev->bus);
1073        struct pnv_phb *phb = hose->private_data;
1074        struct pci_dn *pdn;
1075
1076        /* The function is probably called while the PEs have
1077         * not be created yet. For example, resource reassignment
1078         * during PCI probe period. We just skip the check if
1079         * PEs isn't ready.
1080         */
1081        if (!phb->initialized)
1082                return 0;
1083
1084        pdn = pci_get_pdn(dev);
1085        if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1086                return -EINVAL;
1087
1088        return 0;
1089}
1090
1091static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1092                               u32 devfn)
1093{
1094        return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1095}
1096
1097static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1098{
1099        opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
1100                       OPAL_ASSERT_RESET);
1101}
1102
1103void __init pnv_pci_init_ioda_phb(struct device_node *np,
1104                                  u64 hub_id, int ioda_type)
1105{
1106        struct pci_controller *hose;
1107        struct pnv_phb *phb;
1108        unsigned long size, m32map_off, iomap_off, pemap_off;
1109        const u64 *prop64;
1110        const u32 *prop32;
1111        int len;
1112        u64 phb_id;
1113        void *aux;
1114        long rc;
1115
1116        pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
1117
1118        prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1119        if (!prop64) {
1120                pr_err("  Missing \"ibm,opal-phbid\" property !\n");
1121                return;
1122        }
1123        phb_id = be64_to_cpup(prop64);
1124        pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
1125
1126        phb = alloc_bootmem(sizeof(struct pnv_phb));
1127        if (!phb) {
1128                pr_err("  Out of memory !\n");
1129                return;
1130        }
1131
1132        /* Allocate PCI controller */
1133        memset(phb, 0, sizeof(struct pnv_phb));
1134        phb->hose = hose = pcibios_alloc_controller(np);
1135        if (!phb->hose) {
1136                pr_err("  Can't allocate PCI controller for %s\n",
1137                       np->full_name);
1138                free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
1139                return;
1140        }
1141
1142        spin_lock_init(&phb->lock);
1143        prop32 = of_get_property(np, "bus-range", &len);
1144        if (prop32 && len == 8) {
1145                hose->first_busno = prop32[0];
1146                hose->last_busno = prop32[1];
1147        } else {
1148                pr_warn("  Broken <bus-range> on %s\n", np->full_name);
1149                hose->first_busno = 0;
1150                hose->last_busno = 0xff;
1151        }
1152        hose->private_data = phb;
1153        phb->hub_id = hub_id;
1154        phb->opal_id = phb_id;
1155        phb->type = ioda_type;
1156
1157        /* Detect specific models for error handling */
1158        if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1159                phb->model = PNV_PHB_MODEL_P7IOC;
1160        else if (of_device_is_compatible(np, "ibm,power8-pciex"))
1161                phb->model = PNV_PHB_MODEL_PHB3;
1162        else
1163                phb->model = PNV_PHB_MODEL_UNKNOWN;
1164
1165        /* Parse 32-bit and IO ranges (if any) */
1166        pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
1167
1168        /* Get registers */
1169        phb->regs = of_iomap(np, 0);
1170        if (phb->regs == NULL)
1171                pr_err("  Failed to map registers !\n");
1172
1173        /* Initialize more IODA stuff */
1174        prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
1175        if (!prop32)
1176                phb->ioda.total_pe = 1;
1177        else
1178                phb->ioda.total_pe = *prop32;
1179
1180        phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1181        /* FW Has already off top 64k of M32 space (MSI space) */
1182        phb->ioda.m32_size += 0x10000;
1183
1184        phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1185        phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
1186        phb->ioda.io_size = hose->pci_io_size;
1187        phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1188        phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1189
1190        /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
1191        size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1192        m32map_off = size;
1193        size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
1194        iomap_off = size;
1195        if (phb->type == PNV_PHB_IODA1) {
1196                iomap_off = size;
1197                size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
1198        }
1199        pemap_off = size;
1200        size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1201        aux = alloc_bootmem(size);
1202        memset(aux, 0, size);
1203        phb->ioda.pe_alloc = aux;
1204        phb->ioda.m32_segmap = aux + m32map_off;
1205        if (phb->type == PNV_PHB_IODA1)
1206                phb->ioda.io_segmap = aux + iomap_off;
1207        phb->ioda.pe_array = aux + pemap_off;
1208        set_bit(0, phb->ioda.pe_alloc);
1209
1210        INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
1211        INIT_LIST_HEAD(&phb->ioda.pe_list);
1212
1213        /* Calculate how many 32-bit TCE segments we have */
1214        phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1215
1216        /* Clear unusable m64 */
1217        hose->mem_resources[1].flags = 0;
1218        hose->mem_resources[1].start = 0;
1219        hose->mem_resources[1].end = 0;
1220        hose->mem_resources[2].flags = 0;
1221        hose->mem_resources[2].start = 0;
1222        hose->mem_resources[2].end = 0;
1223
1224#if 0 /* We should really do that ... */
1225        rc = opal_pci_set_phb_mem_window(opal->phb_id,
1226                                         window_type,
1227                                         window_num,
1228                                         starting_real_address,
1229                                         starting_pci_address,
1230                                         segment_size);
1231#endif
1232
1233        pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1234                phb->ioda.total_pe,
1235                phb->ioda.m32_size, phb->ioda.m32_segsize,
1236                phb->ioda.io_size, phb->ioda.io_segsize);
1237
1238        phb->hose->ops = &pnv_pci_ops;
1239#ifdef CONFIG_EEH
1240        phb->eeh_ops = &ioda_eeh_ops;
1241#endif
1242
1243        /* Setup RID -> PE mapping function */
1244        phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1245
1246        /* Setup TCEs */
1247        phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1248
1249        /* Setup shutdown function for kexec */
1250        phb->shutdown = pnv_pci_ioda_shutdown;
1251
1252        /* Setup MSI support */
1253        pnv_pci_init_ioda_msis(phb);
1254
1255        /*
1256         * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1257         * to let the PCI core do resource assignment. It's supposed
1258         * that the PCI core will do correct I/O and MMIO alignment
1259         * for the P2P bridge bars so that each PCI bus (excluding
1260         * the child P2P bridges) can form individual PE.
1261         */
1262        ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1263        ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1264        ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1265        pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1266
1267        /* Reset IODA tables to a clean state */
1268        rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1269        if (rc)
1270                pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1271
1272        /*
1273         * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
1274         * has cleared the RTT which has the same effect
1275         */
1276        if (ioda_type == PNV_PHB_IODA1)
1277                opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1278}
1279
1280void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1281{
1282        pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1283}
1284
1285void __init pnv_pci_init_ioda_hub(struct device_node *np)
1286{
1287        struct device_node *phbn;
1288        const u64 *prop64;
1289        u64 hub_id;
1290
1291        pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1292
1293        prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1294        if (!prop64) {
1295                pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1296                return;
1297        }
1298        hub_id = be64_to_cpup(prop64);
1299        pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1300
1301        /* Count child PHBs */
1302        for_each_child_of_node(np, phbn) {
1303                /* Look for IODA1 PHBs */
1304                if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1305                        pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1306        }
1307}
1308