linux/arch/powerpc/platforms/pseries/iommu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
   4 *
   5 * Rewrite, cleanup:
   6 *
   7 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
   8 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
   9 *
  10 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
  11 */
  12
  13#include <linux/init.h>
  14#include <linux/types.h>
  15#include <linux/slab.h>
  16#include <linux/mm.h>
  17#include <linux/memblock.h>
  18#include <linux/spinlock.h>
  19#include <linux/string.h>
  20#include <linux/pci.h>
  21#include <linux/dma-mapping.h>
  22#include <linux/crash_dump.h>
  23#include <linux/memory.h>
  24#include <linux/of.h>
  25#include <linux/iommu.h>
  26#include <linux/rculist.h>
  27#include <asm/io.h>
  28#include <asm/prom.h>
  29#include <asm/rtas.h>
  30#include <asm/iommu.h>
  31#include <asm/pci-bridge.h>
  32#include <asm/machdep.h>
  33#include <asm/firmware.h>
  34#include <asm/tce.h>
  35#include <asm/ppc-pci.h>
  36#include <asm/udbg.h>
  37#include <asm/mmzone.h>
  38#include <asm/plpar_wrappers.h>
  39
  40#include "pseries.h"
  41
  42static struct iommu_table_group *iommu_pseries_alloc_group(int node)
  43{
  44        struct iommu_table_group *table_group;
  45        struct iommu_table *tbl;
  46
  47        table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
  48                           node);
  49        if (!table_group)
  50                return NULL;
  51
  52        tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
  53        if (!tbl)
  54                goto free_group;
  55
  56        INIT_LIST_HEAD_RCU(&tbl->it_group_list);
  57        kref_init(&tbl->it_kref);
  58
  59        table_group->tables[0] = tbl;
  60
  61        return table_group;
  62
  63free_group:
  64        kfree(table_group);
  65        return NULL;
  66}
  67
  68static void iommu_pseries_free_group(struct iommu_table_group *table_group,
  69                const char *node_name)
  70{
  71        struct iommu_table *tbl;
  72
  73        if (!table_group)
  74                return;
  75
  76        tbl = table_group->tables[0];
  77#ifdef CONFIG_IOMMU_API
  78        if (table_group->group) {
  79                iommu_group_put(table_group->group);
  80                BUG_ON(table_group->group);
  81        }
  82#endif
  83        iommu_tce_table_put(tbl);
  84
  85        kfree(table_group);
  86}
  87
  88static int tce_build_pSeries(struct iommu_table *tbl, long index,
  89                              long npages, unsigned long uaddr,
  90                              enum dma_data_direction direction,
  91                              unsigned long attrs)
  92{
  93        u64 proto_tce;
  94        __be64 *tcep;
  95        u64 rpn;
  96
  97        proto_tce = TCE_PCI_READ; // Read allowed
  98
  99        if (direction != DMA_TO_DEVICE)
 100                proto_tce |= TCE_PCI_WRITE;
 101
 102        tcep = ((__be64 *)tbl->it_base) + index;
 103
 104        while (npages--) {
 105                /* can't move this out since we might cross MEMBLOCK boundary */
 106                rpn = __pa(uaddr) >> TCE_SHIFT;
 107                *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 108
 109                uaddr += TCE_PAGE_SIZE;
 110                tcep++;
 111        }
 112        return 0;
 113}
 114
 115
 116static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
 117{
 118        __be64 *tcep;
 119
 120        tcep = ((__be64 *)tbl->it_base) + index;
 121
 122        while (npages--)
 123                *(tcep++) = 0;
 124}
 125
 126static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 127{
 128        __be64 *tcep;
 129
 130        tcep = ((__be64 *)tbl->it_base) + index;
 131
 132        return be64_to_cpu(*tcep);
 133}
 134
 135static void tce_free_pSeriesLP(struct iommu_table*, long, long);
 136static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 137
 138static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 139                                long npages, unsigned long uaddr,
 140                                enum dma_data_direction direction,
 141                                unsigned long attrs)
 142{
 143        u64 rc = 0;
 144        u64 proto_tce, tce;
 145        u64 rpn;
 146        int ret = 0;
 147        long tcenum_start = tcenum, npages_start = npages;
 148
 149        rpn = __pa(uaddr) >> TCE_SHIFT;
 150        proto_tce = TCE_PCI_READ;
 151        if (direction != DMA_TO_DEVICE)
 152                proto_tce |= TCE_PCI_WRITE;
 153
 154        while (npages--) {
 155                tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 156                rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
 157
 158                if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 159                        ret = (int)rc;
 160                        tce_free_pSeriesLP(tbl, tcenum_start,
 161                                           (npages_start - (npages + 1)));
 162                        break;
 163                }
 164
 165                if (rc && printk_ratelimit()) {
 166                        printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 167                        printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 168                        printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 169                        printk("\ttce val = 0x%llx\n", tce );
 170                        dump_stack();
 171                }
 172
 173                tcenum++;
 174                rpn++;
 175        }
 176        return ret;
 177}
 178
 179static DEFINE_PER_CPU(__be64 *, tce_page);
 180
 181static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 182                                     long npages, unsigned long uaddr,
 183                                     enum dma_data_direction direction,
 184                                     unsigned long attrs)
 185{
 186        u64 rc = 0;
 187        u64 proto_tce;
 188        __be64 *tcep;
 189        u64 rpn;
 190        long l, limit;
 191        long tcenum_start = tcenum, npages_start = npages;
 192        int ret = 0;
 193        unsigned long flags;
 194
 195        if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
 196                return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 197                                           direction, attrs);
 198        }
 199
 200        local_irq_save(flags);  /* to protect tcep and the page behind it */
 201
 202        tcep = __this_cpu_read(tce_page);
 203
 204        /* This is safe to do since interrupts are off when we're called
 205         * from iommu_alloc{,_sg}()
 206         */
 207        if (!tcep) {
 208                tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 209                /* If allocation fails, fall back to the loop implementation */
 210                if (!tcep) {
 211                        local_irq_restore(flags);
 212                        return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 213                                            direction, attrs);
 214                }
 215                __this_cpu_write(tce_page, tcep);
 216        }
 217
 218        rpn = __pa(uaddr) >> TCE_SHIFT;
 219        proto_tce = TCE_PCI_READ;
 220        if (direction != DMA_TO_DEVICE)
 221                proto_tce |= TCE_PCI_WRITE;
 222
 223        /* We can map max one pageful of TCEs at a time */
 224        do {
 225                /*
 226                 * Set up the page with TCE data, looping through and setting
 227                 * the values.
 228                 */
 229                limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 230
 231                for (l = 0; l < limit; l++) {
 232                        tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 233                        rpn++;
 234                }
 235
 236                rc = plpar_tce_put_indirect((u64)tbl->it_index,
 237                                            (u64)tcenum << 12,
 238                                            (u64)__pa(tcep),
 239                                            limit);
 240
 241                npages -= limit;
 242                tcenum += limit;
 243        } while (npages > 0 && !rc);
 244
 245        local_irq_restore(flags);
 246
 247        if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 248                ret = (int)rc;
 249                tce_freemulti_pSeriesLP(tbl, tcenum_start,
 250                                        (npages_start - (npages + limit)));
 251                return ret;
 252        }
 253
 254        if (rc && printk_ratelimit()) {
 255                printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 256                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 257                printk("\tnpages  = 0x%llx\n", (u64)npages);
 258                printk("\ttce[0] val = 0x%llx\n", tcep[0]);
 259                dump_stack();
 260        }
 261        return ret;
 262}
 263
 264static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
 265{
 266        u64 rc;
 267
 268        while (npages--) {
 269                rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
 270
 271                if (rc && printk_ratelimit()) {
 272                        printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 273                        printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 274                        printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 275                        dump_stack();
 276                }
 277
 278                tcenum++;
 279        }
 280}
 281
 282
 283static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
 284{
 285        u64 rc;
 286
 287        if (!firmware_has_feature(FW_FEATURE_MULTITCE))
 288                return tce_free_pSeriesLP(tbl, tcenum, npages);
 289
 290        rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 291
 292        if (rc && printk_ratelimit()) {
 293                printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
 294                printk("\trc      = %lld\n", rc);
 295                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 296                printk("\tnpages  = 0x%llx\n", (u64)npages);
 297                dump_stack();
 298        }
 299}
 300
 301static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 302{
 303        u64 rc;
 304        unsigned long tce_ret;
 305
 306        rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
 307
 308        if (rc && printk_ratelimit()) {
 309                printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
 310                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 311                printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 312                dump_stack();
 313        }
 314
 315        return tce_ret;
 316}
 317
 318/* this is compatible with cells for the device tree property */
 319struct dynamic_dma_window_prop {
 320        __be32  liobn;          /* tce table number */
 321        __be64  dma_base;       /* address hi,lo */
 322        __be32  tce_shift;      /* ilog2(tce_page_size) */
 323        __be32  window_shift;   /* ilog2(tce_window_size) */
 324};
 325
 326struct direct_window {
 327        struct device_node *device;
 328        const struct dynamic_dma_window_prop *prop;
 329        struct list_head list;
 330};
 331
 332/* Dynamic DMA Window support */
 333struct ddw_query_response {
 334        u32 windows_available;
 335        u32 largest_available_block;
 336        u32 page_size;
 337        u32 migration_capable;
 338};
 339
 340struct ddw_create_response {
 341        u32 liobn;
 342        u32 addr_hi;
 343        u32 addr_lo;
 344};
 345
 346static LIST_HEAD(direct_window_list);
 347/* prevents races between memory on/offline and window creation */
 348static DEFINE_SPINLOCK(direct_window_list_lock);
 349/* protects initializing window twice for same device */
 350static DEFINE_MUTEX(direct_window_init_mutex);
 351#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
 352
 353static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 354                                        unsigned long num_pfn, const void *arg)
 355{
 356        const struct dynamic_dma_window_prop *maprange = arg;
 357        int rc;
 358        u64 tce_size, num_tce, dma_offset, next;
 359        u32 tce_shift;
 360        long limit;
 361
 362        tce_shift = be32_to_cpu(maprange->tce_shift);
 363        tce_size = 1ULL << tce_shift;
 364        next = start_pfn << PAGE_SHIFT;
 365        num_tce = num_pfn << PAGE_SHIFT;
 366
 367        /* round back to the beginning of the tce page size */
 368        num_tce += next & (tce_size - 1);
 369        next &= ~(tce_size - 1);
 370
 371        /* covert to number of tces */
 372        num_tce |= tce_size - 1;
 373        num_tce >>= tce_shift;
 374
 375        do {
 376                /*
 377                 * Set up the page with TCE data, looping through and setting
 378                 * the values.
 379                 */
 380                limit = min_t(long, num_tce, 512);
 381                dma_offset = next + be64_to_cpu(maprange->dma_base);
 382
 383                rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
 384                                             dma_offset,
 385                                             0, limit);
 386                next += limit * tce_size;
 387                num_tce -= limit;
 388        } while (num_tce > 0 && !rc);
 389
 390        return rc;
 391}
 392
 393static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 394                                        unsigned long num_pfn, const void *arg)
 395{
 396        const struct dynamic_dma_window_prop *maprange = arg;
 397        u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
 398        __be64 *tcep;
 399        u32 tce_shift;
 400        u64 rc = 0;
 401        long l, limit;
 402
 403        local_irq_disable();    /* to protect tcep and the page behind it */
 404        tcep = __this_cpu_read(tce_page);
 405
 406        if (!tcep) {
 407                tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 408                if (!tcep) {
 409                        local_irq_enable();
 410                        return -ENOMEM;
 411                }
 412                __this_cpu_write(tce_page, tcep);
 413        }
 414
 415        proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
 416
 417        liobn = (u64)be32_to_cpu(maprange->liobn);
 418        tce_shift = be32_to_cpu(maprange->tce_shift);
 419        tce_size = 1ULL << tce_shift;
 420        next = start_pfn << PAGE_SHIFT;
 421        num_tce = num_pfn << PAGE_SHIFT;
 422
 423        /* round back to the beginning of the tce page size */
 424        num_tce += next & (tce_size - 1);
 425        next &= ~(tce_size - 1);
 426
 427        /* covert to number of tces */
 428        num_tce |= tce_size - 1;
 429        num_tce >>= tce_shift;
 430
 431        /* We can map max one pageful of TCEs at a time */
 432        do {
 433                /*
 434                 * Set up the page with TCE data, looping through and setting
 435                 * the values.
 436                 */
 437                limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
 438                dma_offset = next + be64_to_cpu(maprange->dma_base);
 439
 440                for (l = 0; l < limit; l++) {
 441                        tcep[l] = cpu_to_be64(proto_tce | next);
 442                        next += tce_size;
 443                }
 444
 445                rc = plpar_tce_put_indirect(liobn,
 446                                            dma_offset,
 447                                            (u64)__pa(tcep),
 448                                            limit);
 449
 450                num_tce -= limit;
 451        } while (num_tce > 0 && !rc);
 452
 453        /* error cleanup: caller will clear whole range */
 454
 455        local_irq_enable();
 456        return rc;
 457}
 458
 459static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
 460                unsigned long num_pfn, void *arg)
 461{
 462        return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
 463}
 464
 465static void iommu_table_setparms(struct pci_controller *phb,
 466                                 struct device_node *dn,
 467                                 struct iommu_table *tbl)
 468{
 469        struct device_node *node;
 470        const unsigned long *basep;
 471        const u32 *sizep;
 472
 473        node = phb->dn;
 474
 475        basep = of_get_property(node, "linux,tce-base", NULL);
 476        sizep = of_get_property(node, "linux,tce-size", NULL);
 477        if (basep == NULL || sizep == NULL) {
 478                printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
 479                                "missing tce entries !\n", dn);
 480                return;
 481        }
 482
 483        tbl->it_base = (unsigned long)__va(*basep);
 484
 485        if (!is_kdump_kernel())
 486                memset((void *)tbl->it_base, 0, *sizep);
 487
 488        tbl->it_busno = phb->bus->number;
 489        tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 490
 491        /* Units of tce entries */
 492        tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
 493
 494        /* Test if we are going over 2GB of DMA space */
 495        if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
 496                udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 497                panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 498        }
 499
 500        phb->dma_window_base_cur += phb->dma_window_size;
 501
 502        /* Set the tce table size - measured in entries */
 503        tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
 504
 505        tbl->it_index = 0;
 506        tbl->it_blocksize = 16;
 507        tbl->it_type = TCE_PCI;
 508}
 509
 510/*
 511 * iommu_table_setparms_lpar
 512 *
 513 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
 514 */
 515static void iommu_table_setparms_lpar(struct pci_controller *phb,
 516                                      struct device_node *dn,
 517                                      struct iommu_table *tbl,
 518                                      struct iommu_table_group *table_group,
 519                                      const __be32 *dma_window)
 520{
 521        unsigned long offset, size;
 522
 523        of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
 524
 525        tbl->it_busno = phb->bus->number;
 526        tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 527        tbl->it_base   = 0;
 528        tbl->it_blocksize  = 16;
 529        tbl->it_type = TCE_PCI;
 530        tbl->it_offset = offset >> tbl->it_page_shift;
 531        tbl->it_size = size >> tbl->it_page_shift;
 532
 533        table_group->tce32_start = offset;
 534        table_group->tce32_size = size;
 535}
 536
 537struct iommu_table_ops iommu_table_pseries_ops = {
 538        .set = tce_build_pSeries,
 539        .clear = tce_free_pSeries,
 540        .get = tce_get_pseries
 541};
 542
 543static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 544{
 545        struct device_node *dn;
 546        struct iommu_table *tbl;
 547        struct device_node *isa_dn, *isa_dn_orig;
 548        struct device_node *tmp;
 549        struct pci_dn *pci;
 550        int children;
 551
 552        dn = pci_bus_to_OF_node(bus);
 553
 554        pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
 555
 556        if (bus->self) {
 557                /* This is not a root bus, any setup will be done for the
 558                 * device-side of the bridge in iommu_dev_setup_pSeries().
 559                 */
 560                return;
 561        }
 562        pci = PCI_DN(dn);
 563
 564        /* Check if the ISA bus on the system is under
 565         * this PHB.
 566         */
 567        isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
 568
 569        while (isa_dn && isa_dn != dn)
 570                isa_dn = isa_dn->parent;
 571
 572        of_node_put(isa_dn_orig);
 573
 574        /* Count number of direct PCI children of the PHB. */
 575        for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
 576                children++;
 577
 578        pr_debug("Children: %d\n", children);
 579
 580        /* Calculate amount of DMA window per slot. Each window must be
 581         * a power of two (due to pci_alloc_consistent requirements).
 582         *
 583         * Keep 256MB aside for PHBs with ISA.
 584         */
 585
 586        if (!isa_dn) {
 587                /* No ISA/IDE - just set window size and return */
 588                pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
 589
 590                while (pci->phb->dma_window_size * children > 0x80000000ul)
 591                        pci->phb->dma_window_size >>= 1;
 592                pr_debug("No ISA/IDE, window size is 0x%llx\n",
 593                         pci->phb->dma_window_size);
 594                pci->phb->dma_window_base_cur = 0;
 595
 596                return;
 597        }
 598
 599        /* If we have ISA, then we probably have an IDE
 600         * controller too. Allocate a 128MB table but
 601         * skip the first 128MB to avoid stepping on ISA
 602         * space.
 603         */
 604        pci->phb->dma_window_size = 0x8000000ul;
 605        pci->phb->dma_window_base_cur = 0x8000000ul;
 606
 607        pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
 608        tbl = pci->table_group->tables[0];
 609
 610        iommu_table_setparms(pci->phb, dn, tbl);
 611        tbl->it_ops = &iommu_table_pseries_ops;
 612        iommu_init_table(tbl, pci->phb->node);
 613
 614        /* Divide the rest (1.75GB) among the children */
 615        pci->phb->dma_window_size = 0x80000000ul;
 616        while (pci->phb->dma_window_size * children > 0x70000000ul)
 617                pci->phb->dma_window_size >>= 1;
 618
 619        pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
 620}
 621
 622#ifdef CONFIG_IOMMU_API
 623static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
 624                                long *tce, enum dma_data_direction *direction)
 625{
 626        long rc;
 627        unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
 628        unsigned long flags, oldtce = 0;
 629        u64 proto_tce = iommu_direction_to_tce_perm(*direction);
 630        unsigned long newtce = *tce | proto_tce;
 631
 632        spin_lock_irqsave(&tbl->large_pool.lock, flags);
 633
 634        rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce);
 635        if (!rc)
 636                rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
 637
 638        if (!rc) {
 639                *direction = iommu_tce_direction(oldtce);
 640                *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
 641        }
 642
 643        spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
 644
 645        return rc;
 646}
 647#endif
 648
 649struct iommu_table_ops iommu_table_lpar_multi_ops = {
 650        .set = tce_buildmulti_pSeriesLP,
 651#ifdef CONFIG_IOMMU_API
 652        .exchange = tce_exchange_pseries,
 653#endif
 654        .clear = tce_freemulti_pSeriesLP,
 655        .get = tce_get_pSeriesLP
 656};
 657
 658static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 659{
 660        struct iommu_table *tbl;
 661        struct device_node *dn, *pdn;
 662        struct pci_dn *ppci;
 663        const __be32 *dma_window = NULL;
 664
 665        dn = pci_bus_to_OF_node(bus);
 666
 667        pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
 668                 dn);
 669
 670        /* Find nearest ibm,dma-window, walking up the device tree */
 671        for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
 672                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 673                if (dma_window != NULL)
 674                        break;
 675        }
 676
 677        if (dma_window == NULL) {
 678                pr_debug("  no ibm,dma-window property !\n");
 679                return;
 680        }
 681
 682        ppci = PCI_DN(pdn);
 683
 684        pr_debug("  parent is %pOF, iommu_table: 0x%p\n",
 685                 pdn, ppci->table_group);
 686
 687        if (!ppci->table_group) {
 688                ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
 689                tbl = ppci->table_group->tables[0];
 690                iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
 691                                ppci->table_group, dma_window);
 692                tbl->it_ops = &iommu_table_lpar_multi_ops;
 693                iommu_init_table(tbl, ppci->phb->node);
 694                iommu_register_group(ppci->table_group,
 695                                pci_domain_nr(bus), 0);
 696                pr_debug("  created table: %p\n", ppci->table_group);
 697        }
 698}
 699
 700
 701static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 702{
 703        struct device_node *dn;
 704        struct iommu_table *tbl;
 705
 706        pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
 707
 708        dn = dev->dev.of_node;
 709
 710        /* If we're the direct child of a root bus, then we need to allocate
 711         * an iommu table ourselves. The bus setup code should have setup
 712         * the window sizes already.
 713         */
 714        if (!dev->bus->self) {
 715                struct pci_controller *phb = PCI_DN(dn)->phb;
 716
 717                pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
 718                PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
 719                tbl = PCI_DN(dn)->table_group->tables[0];
 720                iommu_table_setparms(phb, dn, tbl);
 721                tbl->it_ops = &iommu_table_pseries_ops;
 722                iommu_init_table(tbl, phb->node);
 723                set_iommu_table_base(&dev->dev, tbl);
 724                return;
 725        }
 726
 727        /* If this device is further down the bus tree, search upwards until
 728         * an already allocated iommu table is found and use that.
 729         */
 730
 731        while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
 732                dn = dn->parent;
 733
 734        if (dn && PCI_DN(dn))
 735                set_iommu_table_base(&dev->dev,
 736                                PCI_DN(dn)->table_group->tables[0]);
 737        else
 738                printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
 739                       pci_name(dev));
 740}
 741
 742static int __read_mostly disable_ddw;
 743
 744static int __init disable_ddw_setup(char *str)
 745{
 746        disable_ddw = 1;
 747        printk(KERN_INFO "ppc iommu: disabling ddw.\n");
 748
 749        return 0;
 750}
 751
 752early_param("disable_ddw", disable_ddw_setup);
 753
 754static void remove_ddw(struct device_node *np, bool remove_prop)
 755{
 756        struct dynamic_dma_window_prop *dwp;
 757        struct property *win64;
 758        u32 ddw_avail[3];
 759        u64 liobn;
 760        int ret = 0;
 761
 762        ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
 763                                         &ddw_avail[0], 3);
 764
 765        win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
 766        if (!win64)
 767                return;
 768
 769        if (ret || win64->length < sizeof(*dwp))
 770                goto delprop;
 771
 772        dwp = win64->value;
 773        liobn = (u64)be32_to_cpu(dwp->liobn);
 774
 775        /* clear the whole window, note the arg is in kernel pages */
 776        ret = tce_clearrange_multi_pSeriesLP(0,
 777                1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
 778        if (ret)
 779                pr_warn("%pOF failed to clear tces in window.\n",
 780                        np);
 781        else
 782                pr_debug("%pOF successfully cleared tces in window.\n",
 783                         np);
 784
 785        ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 786        if (ret)
 787                pr_warn("%pOF: failed to remove direct window: rtas returned "
 788                        "%d to ibm,remove-pe-dma-window(%x) %llx\n",
 789                        np, ret, ddw_avail[2], liobn);
 790        else
 791                pr_debug("%pOF: successfully removed direct window: rtas returned "
 792                        "%d to ibm,remove-pe-dma-window(%x) %llx\n",
 793                        np, ret, ddw_avail[2], liobn);
 794
 795delprop:
 796        if (remove_prop)
 797                ret = of_remove_property(np, win64);
 798        if (ret)
 799                pr_warn("%pOF: failed to remove direct window property: %d\n",
 800                        np, ret);
 801}
 802
 803static u64 find_existing_ddw(struct device_node *pdn)
 804{
 805        struct direct_window *window;
 806        const struct dynamic_dma_window_prop *direct64;
 807        u64 dma_addr = 0;
 808
 809        spin_lock(&direct_window_list_lock);
 810        /* check if we already created a window and dupe that config if so */
 811        list_for_each_entry(window, &direct_window_list, list) {
 812                if (window->device == pdn) {
 813                        direct64 = window->prop;
 814                        dma_addr = be64_to_cpu(direct64->dma_base);
 815                        break;
 816                }
 817        }
 818        spin_unlock(&direct_window_list_lock);
 819
 820        return dma_addr;
 821}
 822
 823static int find_existing_ddw_windows(void)
 824{
 825        int len;
 826        struct device_node *pdn;
 827        struct direct_window *window;
 828        const struct dynamic_dma_window_prop *direct64;
 829
 830        if (!firmware_has_feature(FW_FEATURE_LPAR))
 831                return 0;
 832
 833        for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
 834                direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
 835                if (!direct64)
 836                        continue;
 837
 838                window = kzalloc(sizeof(*window), GFP_KERNEL);
 839                if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
 840                        kfree(window);
 841                        remove_ddw(pdn, true);
 842                        continue;
 843                }
 844
 845                window->device = pdn;
 846                window->prop = direct64;
 847                spin_lock(&direct_window_list_lock);
 848                list_add(&window->list, &direct_window_list);
 849                spin_unlock(&direct_window_list_lock);
 850        }
 851
 852        return 0;
 853}
 854machine_arch_initcall(pseries, find_existing_ddw_windows);
 855
 856static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 857                        struct ddw_query_response *query)
 858{
 859        struct device_node *dn;
 860        struct pci_dn *pdn;
 861        u32 cfg_addr;
 862        u64 buid;
 863        int ret;
 864
 865        /*
 866         * Get the config address and phb buid of the PE window.
 867         * Rely on eeh to retrieve this for us.
 868         * Retrieve them from the pci device, not the node with the
 869         * dma-window property
 870         */
 871        dn = pci_device_to_OF_node(dev);
 872        pdn = PCI_DN(dn);
 873        buid = pdn->phb->buid;
 874        cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 875
 876        ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 877                  cfg_addr, BUID_HI(buid), BUID_LO(buid));
 878        dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
 879                " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
 880                BUID_LO(buid), ret);
 881        return ret;
 882}
 883
 884static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 885                        struct ddw_create_response *create, int page_shift,
 886                        int window_shift)
 887{
 888        struct device_node *dn;
 889        struct pci_dn *pdn;
 890        u32 cfg_addr;
 891        u64 buid;
 892        int ret;
 893
 894        /*
 895         * Get the config address and phb buid of the PE window.
 896         * Rely on eeh to retrieve this for us.
 897         * Retrieve them from the pci device, not the node with the
 898         * dma-window property
 899         */
 900        dn = pci_device_to_OF_node(dev);
 901        pdn = PCI_DN(dn);
 902        buid = pdn->phb->buid;
 903        cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 904
 905        do {
 906                /* extra outputs are LIOBN and dma-addr (hi, lo) */
 907                ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
 908                                cfg_addr, BUID_HI(buid), BUID_LO(buid),
 909                                page_shift, window_shift);
 910        } while (rtas_busy_delay(ret));
 911        dev_info(&dev->dev,
 912                "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
 913                "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
 914                 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
 915                 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
 916
 917        return ret;
 918}
 919
 920struct failed_ddw_pdn {
 921        struct device_node *pdn;
 922        struct list_head list;
 923};
 924
 925static LIST_HEAD(failed_ddw_pdn_list);
 926
 927static phys_addr_t ddw_memory_hotplug_max(void)
 928{
 929        phys_addr_t max_addr = memory_hotplug_max();
 930        struct device_node *memory;
 931
 932        for_each_node_by_type(memory, "memory") {
 933                unsigned long start, size;
 934                int n_mem_addr_cells, n_mem_size_cells, len;
 935                const __be32 *memcell_buf;
 936
 937                memcell_buf = of_get_property(memory, "reg", &len);
 938                if (!memcell_buf || len <= 0)
 939                        continue;
 940
 941                n_mem_addr_cells = of_n_addr_cells(memory);
 942                n_mem_size_cells = of_n_size_cells(memory);
 943
 944                start = of_read_number(memcell_buf, n_mem_addr_cells);
 945                memcell_buf += n_mem_addr_cells;
 946                size = of_read_number(memcell_buf, n_mem_size_cells);
 947                memcell_buf += n_mem_size_cells;
 948
 949                max_addr = max_t(phys_addr_t, max_addr, start + size);
 950        }
 951
 952        return max_addr;
 953}
 954
 955/*
 956 * If the PE supports dynamic dma windows, and there is space for a table
 957 * that can map all pages in a linear offset, then setup such a table,
 958 * and record the dma-offset in the struct device.
 959 *
 960 * dev: the pci device we are checking
 961 * pdn: the parent pe node with the ibm,dma_window property
 962 * Future: also check if we can remap the base window for our base page size
 963 *
 964 * returns the dma offset for use by the direct mapped DMA code.
 965 */
 966static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 967{
 968        int len, ret;
 969        struct ddw_query_response query;
 970        struct ddw_create_response create;
 971        int page_shift;
 972        u64 dma_addr, max_addr;
 973        struct device_node *dn;
 974        u32 ddw_avail[3];
 975        struct direct_window *window;
 976        struct property *win64;
 977        struct dynamic_dma_window_prop *ddwprop;
 978        struct failed_ddw_pdn *fpdn;
 979
 980        mutex_lock(&direct_window_init_mutex);
 981
 982        dma_addr = find_existing_ddw(pdn);
 983        if (dma_addr != 0)
 984                goto out_unlock;
 985
 986        /*
 987         * If we already went through this for a previous function of
 988         * the same device and failed, we don't want to muck with the
 989         * DMA window again, as it will race with in-flight operations
 990         * and can lead to EEHs. The above mutex protects access to the
 991         * list.
 992         */
 993        list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
 994                if (fpdn->pdn == pdn)
 995                        goto out_unlock;
 996        }
 997
 998        /*
 999         * the ibm,ddw-applicable property holds the tokens for:
1000         * ibm,query-pe-dma-window
1001         * ibm,create-pe-dma-window
1002         * ibm,remove-pe-dma-window
1003         * for the given node in that order.
1004         * the property is actually in the parent, not the PE
1005         */
1006        ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1007                                         &ddw_avail[0], 3);
1008        if (ret)
1009                goto out_failed;
1010
1011       /*
1012         * Query if there is a second window of size to map the
1013         * whole partition.  Query returns number of windows, largest
1014         * block assigned to PE (partition endpoint), and two bitmasks
1015         * of page sizes: supported and supported for migrate-dma.
1016         */
1017        dn = pci_device_to_OF_node(dev);
1018        ret = query_ddw(dev, ddw_avail, &query);
1019        if (ret != 0)
1020                goto out_failed;
1021
1022        if (query.windows_available == 0) {
1023                /*
1024                 * no additional windows are available for this device.
1025                 * We might be able to reallocate the existing window,
1026                 * trading in for a larger page size.
1027                 */
1028                dev_dbg(&dev->dev, "no free dynamic windows");
1029                goto out_failed;
1030        }
1031        if (query.page_size & 4) {
1032                page_shift = 24; /* 16MB */
1033        } else if (query.page_size & 2) {
1034                page_shift = 16; /* 64kB */
1035        } else if (query.page_size & 1) {
1036                page_shift = 12; /* 4kB */
1037        } else {
1038                dev_dbg(&dev->dev, "no supported direct page size in mask %x",
1039                          query.page_size);
1040                goto out_failed;
1041        }
1042        /* verify the window * number of ptes will map the partition */
1043        /* check largest block * page size > max memory hotplug addr */
1044        max_addr = ddw_memory_hotplug_max();
1045        if (query.largest_available_block < (max_addr >> page_shift)) {
1046                dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1047                          "%llu-sized pages\n", max_addr,  query.largest_available_block,
1048                          1ULL << page_shift);
1049                goto out_failed;
1050        }
1051        len = order_base_2(max_addr);
1052        win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
1053        if (!win64) {
1054                dev_info(&dev->dev,
1055                        "couldn't allocate property for 64bit dma window\n");
1056                goto out_failed;
1057        }
1058        win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
1059        win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
1060        win64->length = sizeof(*ddwprop);
1061        if (!win64->name || !win64->value) {
1062                dev_info(&dev->dev,
1063                        "couldn't allocate property name and value\n");
1064                goto out_free_prop;
1065        }
1066
1067        ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
1068        if (ret != 0)
1069                goto out_free_prop;
1070
1071        ddwprop->liobn = cpu_to_be32(create.liobn);
1072        ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
1073                        create.addr_lo);
1074        ddwprop->tce_shift = cpu_to_be32(page_shift);
1075        ddwprop->window_shift = cpu_to_be32(len);
1076
1077        dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
1078                  create.liobn, dn);
1079
1080        window = kzalloc(sizeof(*window), GFP_KERNEL);
1081        if (!window)
1082                goto out_clear_window;
1083
1084        ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1085                        win64->value, tce_setrange_multi_pSeriesLP_walk);
1086        if (ret) {
1087                dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
1088                         dn, ret);
1089                goto out_free_window;
1090        }
1091
1092        ret = of_add_property(pdn, win64);
1093        if (ret) {
1094                dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
1095                         pdn, ret);
1096                goto out_free_window;
1097        }
1098
1099        window->device = pdn;
1100        window->prop = ddwprop;
1101        spin_lock(&direct_window_list_lock);
1102        list_add(&window->list, &direct_window_list);
1103        spin_unlock(&direct_window_list_lock);
1104
1105        dma_addr = be64_to_cpu(ddwprop->dma_base);
1106        goto out_unlock;
1107
1108out_free_window:
1109        kfree(window);
1110
1111out_clear_window:
1112        remove_ddw(pdn, true);
1113
1114out_free_prop:
1115        kfree(win64->name);
1116        kfree(win64->value);
1117        kfree(win64);
1118
1119out_failed:
1120
1121        fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
1122        if (!fpdn)
1123                goto out_unlock;
1124        fpdn->pdn = pdn;
1125        list_add(&fpdn->list, &failed_ddw_pdn_list);
1126
1127out_unlock:
1128        mutex_unlock(&direct_window_init_mutex);
1129        return dma_addr;
1130}
1131
1132static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
1133{
1134        struct device_node *pdn, *dn;
1135        struct iommu_table *tbl;
1136        const __be32 *dma_window = NULL;
1137        struct pci_dn *pci;
1138
1139        pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
1140
1141        /* dev setup for LPAR is a little tricky, since the device tree might
1142         * contain the dma-window properties per-device and not necessarily
1143         * for the bus. So we need to search upwards in the tree until we
1144         * either hit a dma-window property, OR find a parent with a table
1145         * already allocated.
1146         */
1147        dn = pci_device_to_OF_node(dev);
1148        pr_debug("  node is %pOF\n", dn);
1149
1150        for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1151             pdn = pdn->parent) {
1152                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1153                if (dma_window)
1154                        break;
1155        }
1156
1157        if (!pdn || !PCI_DN(pdn)) {
1158                printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
1159                       "no DMA window found for pci dev=%s dn=%pOF\n",
1160                                 pci_name(dev), dn);
1161                return;
1162        }
1163        pr_debug("  parent is %pOF\n", pdn);
1164
1165        pci = PCI_DN(pdn);
1166        if (!pci->table_group) {
1167                pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
1168                tbl = pci->table_group->tables[0];
1169                iommu_table_setparms_lpar(pci->phb, pdn, tbl,
1170                                pci->table_group, dma_window);
1171                tbl->it_ops = &iommu_table_lpar_multi_ops;
1172                iommu_init_table(tbl, pci->phb->node);
1173                iommu_register_group(pci->table_group,
1174                                pci_domain_nr(pci->phb->bus), 0);
1175                pr_debug("  created table: %p\n", pci->table_group);
1176        } else {
1177                pr_debug("  found DMA window, table: %p\n", pci->table_group);
1178        }
1179
1180        set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1181        iommu_add_device(pci->table_group, &dev->dev);
1182}
1183
1184static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
1185{
1186        struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
1187        const __be32 *dma_window = NULL;
1188
1189        /* only attempt to use a new window if 64-bit DMA is requested */
1190        if (dma_mask < DMA_BIT_MASK(64))
1191                return false;
1192
1193        dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1194
1195        /*
1196         * the device tree might contain the dma-window properties
1197         * per-device and not necessarily for the bus. So we need to
1198         * search upwards in the tree until we either hit a dma-window
1199         * property, OR find a parent with a table already allocated.
1200         */
1201        for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1202                        pdn = pdn->parent) {
1203                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1204                if (dma_window)
1205                        break;
1206        }
1207
1208        if (pdn && PCI_DN(pdn)) {
1209                pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1210                if (pdev->dev.archdata.dma_offset)
1211                        return true;
1212        }
1213
1214        return false;
1215}
1216
1217static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
1218                void *data)
1219{
1220        struct direct_window *window;
1221        struct memory_notify *arg = data;
1222        int ret = 0;
1223
1224        switch (action) {
1225        case MEM_GOING_ONLINE:
1226                spin_lock(&direct_window_list_lock);
1227                list_for_each_entry(window, &direct_window_list, list) {
1228                        ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
1229                                        arg->nr_pages, window->prop);
1230                        /* XXX log error */
1231                }
1232                spin_unlock(&direct_window_list_lock);
1233                break;
1234        case MEM_CANCEL_ONLINE:
1235        case MEM_OFFLINE:
1236                spin_lock(&direct_window_list_lock);
1237                list_for_each_entry(window, &direct_window_list, list) {
1238                        ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
1239                                        arg->nr_pages, window->prop);
1240                        /* XXX log error */
1241                }
1242                spin_unlock(&direct_window_list_lock);
1243                break;
1244        default:
1245                break;
1246        }
1247        if (ret && action != MEM_CANCEL_ONLINE)
1248                return NOTIFY_BAD;
1249
1250        return NOTIFY_OK;
1251}
1252
1253static struct notifier_block iommu_mem_nb = {
1254        .notifier_call = iommu_mem_notifier,
1255};
1256
1257static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
1258{
1259        int err = NOTIFY_OK;
1260        struct of_reconfig_data *rd = data;
1261        struct device_node *np = rd->dn;
1262        struct pci_dn *pci = PCI_DN(np);
1263        struct direct_window *window;
1264
1265        switch (action) {
1266        case OF_RECONFIG_DETACH_NODE:
1267                /*
1268                 * Removing the property will invoke the reconfig
1269                 * notifier again, which causes dead-lock on the
1270                 * read-write semaphore of the notifier chain. So
1271                 * we have to remove the property when releasing
1272                 * the device node.
1273                 */
1274                remove_ddw(np, false);
1275                if (pci && pci->table_group)
1276                        iommu_pseries_free_group(pci->table_group,
1277                                        np->full_name);
1278
1279                spin_lock(&direct_window_list_lock);
1280                list_for_each_entry(window, &direct_window_list, list) {
1281                        if (window->device == np) {
1282                                list_del(&window->list);
1283                                kfree(window);
1284                                break;
1285                        }
1286                }
1287                spin_unlock(&direct_window_list_lock);
1288                break;
1289        default:
1290                err = NOTIFY_DONE;
1291                break;
1292        }
1293        return err;
1294}
1295
1296static struct notifier_block iommu_reconfig_nb = {
1297        .notifier_call = iommu_reconfig_notifier,
1298};
1299
1300/* These are called very early. */
1301void iommu_init_early_pSeries(void)
1302{
1303        if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
1304                return;
1305
1306        if (firmware_has_feature(FW_FEATURE_LPAR)) {
1307                pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
1308                pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1309                if (!disable_ddw)
1310                        pseries_pci_controller_ops.iommu_bypass_supported =
1311                                iommu_bypass_supported_pSeriesLP;
1312        } else {
1313                pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
1314                pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
1315        }
1316
1317
1318        of_reconfig_notifier_register(&iommu_reconfig_nb);
1319        register_memory_notifier(&iommu_mem_nb);
1320
1321        set_pci_dma_ops(&dma_iommu_ops);
1322}
1323
1324static int __init disable_multitce(char *str)
1325{
1326        if (strcmp(str, "off") == 0 &&
1327            firmware_has_feature(FW_FEATURE_LPAR) &&
1328            firmware_has_feature(FW_FEATURE_MULTITCE)) {
1329                printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1330                powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1331        }
1332        return 1;
1333}
1334
1335__setup("multitce=", disable_multitce);
1336
1337static int tce_iommu_bus_notifier(struct notifier_block *nb,
1338                unsigned long action, void *data)
1339{
1340        struct device *dev = data;
1341
1342        switch (action) {
1343        case BUS_NOTIFY_DEL_DEVICE:
1344                iommu_del_device(dev);
1345                return 0;
1346        default:
1347                return 0;
1348        }
1349}
1350
1351static struct notifier_block tce_iommu_bus_nb = {
1352        .notifier_call = tce_iommu_bus_notifier,
1353};
1354
1355static int __init tce_iommu_bus_notifier_init(void)
1356{
1357        bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1358        return 0;
1359}
1360machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
1361