linux/arch/powerpc/platforms/pseries/iommu.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
   3 *
   4 * Rewrite, cleanup:
   5 *
   6 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
   7 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
   8 *
   9 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
  10 *
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License as published by
  14 * the Free Software Foundation; either version 2 of the License, or
  15 * (at your option) any later version.
  16 *
  17 * This program is distributed in the hope that it will be useful,
  18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 * GNU General Public License for more details.
  21 *
  22 * You should have received a copy of the GNU General Public License
  23 * along with this program; if not, write to the Free Software
  24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  25 */
  26
  27#include <linux/init.h>
  28#include <linux/types.h>
  29#include <linux/slab.h>
  30#include <linux/mm.h>
  31#include <linux/spinlock.h>
  32#include <linux/string.h>
  33#include <linux/pci.h>
  34#include <linux/dma-mapping.h>
  35#include <asm/io.h>
  36#include <asm/prom.h>
  37#include <asm/rtas.h>
  38#include <asm/iommu.h>
  39#include <asm/pci-bridge.h>
  40#include <asm/machdep.h>
  41#include <asm/abs_addr.h>
  42#include <asm/pSeries_reconfig.h>
  43#include <asm/firmware.h>
  44#include <asm/tce.h>
  45#include <asm/ppc-pci.h>
  46#include <asm/udbg.h>
  47
  48#include "plpar_wrappers.h"
  49
  50#define DBG(fmt...)
  51
  52static void tce_build_pSeries(struct iommu_table *tbl, long index,
  53                              long npages, unsigned long uaddr,
  54                              enum dma_data_direction direction)
  55{
  56        u64 proto_tce;
  57        u64 *tcep;
  58        u64 rpn;
  59
  60        proto_tce = TCE_PCI_READ; // Read allowed
  61
  62        if (direction != DMA_TO_DEVICE)
  63                proto_tce |= TCE_PCI_WRITE;
  64
  65        tcep = ((u64 *)tbl->it_base) + index;
  66
  67        while (npages--) {
  68                /* can't move this out since we might cross LMB boundary */
  69                rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
  70                *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
  71
  72                uaddr += TCE_PAGE_SIZE;
  73                tcep++;
  74        }
  75}
  76
  77
  78static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
  79{
  80        u64 *tcep;
  81
  82        tcep = ((u64 *)tbl->it_base) + index;
  83
  84        while (npages--)
  85                *(tcep++) = 0;
  86}
  87
  88static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
  89{
  90        u64 *tcep;
  91
  92        tcep = ((u64 *)tbl->it_base) + index;
  93
  94        return *tcep;
  95}
  96
  97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
  98                                long npages, unsigned long uaddr,
  99                                enum dma_data_direction direction)
 100{
 101        u64 rc;
 102        u64 proto_tce, tce;
 103        u64 rpn;
 104
 105        rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 106        proto_tce = TCE_PCI_READ;
 107        if (direction != DMA_TO_DEVICE)
 108                proto_tce |= TCE_PCI_WRITE;
 109
 110        while (npages--) {
 111                tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 112                rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
 113
 114                if (rc && printk_ratelimit()) {
 115                        printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
 116                        printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
 117                        printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
 118                        printk("\ttce val = 0x%lx\n", tce );
 119                        show_stack(current, (unsigned long *)__get_SP());
 120                }
 121
 122                tcenum++;
 123                rpn++;
 124        }
 125}
 126
 127static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
 128
 129static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 130                                     long npages, unsigned long uaddr,
 131                                     enum dma_data_direction direction)
 132{
 133        u64 rc;
 134        u64 proto_tce;
 135        u64 *tcep;
 136        u64 rpn;
 137        long l, limit;
 138
 139        if (npages == 1)
 140                return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 141                                           direction);
 142
 143        tcep = __get_cpu_var(tce_page);
 144
 145        /* This is safe to do since interrupts are off when we're called
 146         * from iommu_alloc{,_sg}()
 147         */
 148        if (!tcep) {
 149                tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 150                /* If allocation fails, fall back to the loop implementation */
 151                if (!tcep)
 152                        return tce_build_pSeriesLP(tbl, tcenum, npages,
 153                                                   uaddr, direction);
 154                __get_cpu_var(tce_page) = tcep;
 155        }
 156
 157        rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 158        proto_tce = TCE_PCI_READ;
 159        if (direction != DMA_TO_DEVICE)
 160                proto_tce |= TCE_PCI_WRITE;
 161
 162        /* We can map max one pageful of TCEs at a time */
 163        do {
 164                /*
 165                 * Set up the page with TCE data, looping through and setting
 166                 * the values.
 167                 */
 168                limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 169
 170                for (l = 0; l < limit; l++) {
 171                        tcep[l] = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 172                        rpn++;
 173                }
 174
 175                rc = plpar_tce_put_indirect((u64)tbl->it_index,
 176                                            (u64)tcenum << 12,
 177                                            (u64)virt_to_abs(tcep),
 178                                            limit);
 179
 180                npages -= limit;
 181                tcenum += limit;
 182        } while (npages > 0 && !rc);
 183
 184        if (rc && printk_ratelimit()) {
 185                printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
 186                printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
 187                printk("\tnpages  = 0x%lx\n", (u64)npages);
 188                printk("\ttce[0] val = 0x%lx\n", tcep[0]);
 189                show_stack(current, (unsigned long *)__get_SP());
 190        }
 191}
 192
 193static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
 194{
 195        u64 rc;
 196
 197        while (npages--) {
 198                rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
 199
 200                if (rc && printk_ratelimit()) {
 201                        printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
 202                        printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
 203                        printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
 204                        show_stack(current, (unsigned long *)__get_SP());
 205                }
 206
 207                tcenum++;
 208        }
 209}
 210
 211
 212static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
 213{
 214        u64 rc;
 215
 216        rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 217
 218        if (rc && printk_ratelimit()) {
 219                printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
 220                printk("\trc      = %ld\n", rc);
 221                printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
 222                printk("\tnpages  = 0x%lx\n", (u64)npages);
 223                show_stack(current, (unsigned long *)__get_SP());
 224        }
 225}
 226
 227static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 228{
 229        u64 rc;
 230        unsigned long tce_ret;
 231
 232        rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
 233
 234        if (rc && printk_ratelimit()) {
 235                printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%ld\n",
 236                        rc);
 237                printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
 238                printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
 239                show_stack(current, (unsigned long *)__get_SP());
 240        }
 241
 242        return tce_ret;
 243}
 244
 245#ifdef CONFIG_PCI
 246static void iommu_table_setparms(struct pci_controller *phb,
 247                                 struct device_node *dn,
 248                                 struct iommu_table *tbl)
 249{
 250        struct device_node *node;
 251        const unsigned long *basep;
 252        const u32 *sizep;
 253
 254        node = (struct device_node *)phb->arch_data;
 255
 256        basep = of_get_property(node, "linux,tce-base", NULL);
 257        sizep = of_get_property(node, "linux,tce-size", NULL);
 258        if (basep == NULL || sizep == NULL) {
 259                printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
 260                                "missing tce entries !\n", dn->full_name);
 261                return;
 262        }
 263
 264        tbl->it_base = (unsigned long)__va(*basep);
 265
 266#ifndef CONFIG_CRASH_DUMP
 267        memset((void *)tbl->it_base, 0, *sizep);
 268#endif
 269
 270        tbl->it_busno = phb->bus->number;
 271
 272        /* Units of tce entries */
 273        tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
 274
 275        /* Test if we are going over 2GB of DMA space */
 276        if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
 277                udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 278                panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 279        }
 280
 281        phb->dma_window_base_cur += phb->dma_window_size;
 282
 283        /* Set the tce table size - measured in entries */
 284        tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
 285
 286        tbl->it_index = 0;
 287        tbl->it_blocksize = 16;
 288        tbl->it_type = TCE_PCI;
 289}
 290
 291/*
 292 * iommu_table_setparms_lpar
 293 *
 294 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
 295 */
 296static void iommu_table_setparms_lpar(struct pci_controller *phb,
 297                                      struct device_node *dn,
 298                                      struct iommu_table *tbl,
 299                                      const void *dma_window)
 300{
 301        unsigned long offset, size;
 302
 303        tbl->it_busno  = PCI_DN(dn)->bussubno;
 304        of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
 305
 306        tbl->it_base   = 0;
 307        tbl->it_blocksize  = 16;
 308        tbl->it_type = TCE_PCI;
 309        tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
 310        tbl->it_size = size >> IOMMU_PAGE_SHIFT;
 311}
 312
 313static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 314{
 315        struct device_node *dn;
 316        struct iommu_table *tbl;
 317        struct device_node *isa_dn, *isa_dn_orig;
 318        struct device_node *tmp;
 319        struct pci_dn *pci;
 320        int children;
 321
 322        dn = pci_bus_to_OF_node(bus);
 323
 324        DBG("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
 325
 326        if (bus->self) {
 327                /* This is not a root bus, any setup will be done for the
 328                 * device-side of the bridge in iommu_dev_setup_pSeries().
 329                 */
 330                return;
 331        }
 332        pci = PCI_DN(dn);
 333
 334        /* Check if the ISA bus on the system is under
 335         * this PHB.
 336         */
 337        isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
 338
 339        while (isa_dn && isa_dn != dn)
 340                isa_dn = isa_dn->parent;
 341
 342        if (isa_dn_orig)
 343                of_node_put(isa_dn_orig);
 344
 345        /* Count number of direct PCI children of the PHB. */
 346        for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
 347                children++;
 348
 349        DBG("Children: %d\n", children);
 350
 351        /* Calculate amount of DMA window per slot. Each window must be
 352         * a power of two (due to pci_alloc_consistent requirements).
 353         *
 354         * Keep 256MB aside for PHBs with ISA.
 355         */
 356
 357        if (!isa_dn) {
 358                /* No ISA/IDE - just set window size and return */
 359                pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
 360
 361                while (pci->phb->dma_window_size * children > 0x80000000ul)
 362                        pci->phb->dma_window_size >>= 1;
 363                DBG("No ISA/IDE, window size is 0x%lx\n",
 364                        pci->phb->dma_window_size);
 365                pci->phb->dma_window_base_cur = 0;
 366
 367                return;
 368        }
 369
 370        /* If we have ISA, then we probably have an IDE
 371         * controller too. Allocate a 128MB table but
 372         * skip the first 128MB to avoid stepping on ISA
 373         * space.
 374         */
 375        pci->phb->dma_window_size = 0x8000000ul;
 376        pci->phb->dma_window_base_cur = 0x8000000ul;
 377
 378        tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 379                           pci->phb->node);
 380
 381        iommu_table_setparms(pci->phb, dn, tbl);
 382        pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
 383
 384        /* Divide the rest (1.75GB) among the children */
 385        pci->phb->dma_window_size = 0x80000000ul;
 386        while (pci->phb->dma_window_size * children > 0x70000000ul)
 387                pci->phb->dma_window_size >>= 1;
 388
 389        DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
 390
 391}
 392
 393
 394static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 395{
 396        struct iommu_table *tbl;
 397        struct device_node *dn, *pdn;
 398        struct pci_dn *ppci;
 399        const void *dma_window = NULL;
 400
 401        dn = pci_bus_to_OF_node(bus);
 402
 403        DBG("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", dn->full_name);
 404
 405        /* Find nearest ibm,dma-window, walking up the device tree */
 406        for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
 407                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 408                if (dma_window != NULL)
 409                        break;
 410        }
 411
 412        if (dma_window == NULL) {
 413                DBG("  no ibm,dma-window property !\n");
 414                return;
 415        }
 416
 417        ppci = PCI_DN(pdn);
 418
 419        DBG("  parent is %s, iommu_table: 0x%p\n",
 420            pdn->full_name, ppci->iommu_table);
 421
 422        if (!ppci->iommu_table) {
 423                /* Bussubno hasn't been copied yet.
 424                 * Do it now because iommu_table_setparms_lpar needs it.
 425                 */
 426
 427                ppci->bussubno = bus->number;
 428
 429                tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 430                                   ppci->phb->node);
 431
 432                iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
 433
 434                ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
 435                DBG("  created table: %p\n", ppci->iommu_table);
 436        }
 437
 438        if (pdn != dn)
 439                PCI_DN(dn)->iommu_table = ppci->iommu_table;
 440}
 441
 442
 443static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 444{
 445        struct device_node *dn;
 446        struct iommu_table *tbl;
 447
 448        DBG("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
 449
 450        dn = dev->dev.archdata.of_node;
 451
 452        /* If we're the direct child of a root bus, then we need to allocate
 453         * an iommu table ourselves. The bus setup code should have setup
 454         * the window sizes already.
 455         */
 456        if (!dev->bus->self) {
 457                struct pci_controller *phb = PCI_DN(dn)->phb;
 458
 459                DBG(" --> first child, no bridge. Allocating iommu table.\n");
 460                tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 461                                   phb->node);
 462                iommu_table_setparms(phb, dn, tbl);
 463                PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
 464                dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table;
 465                return;
 466        }
 467
 468        /* If this device is further down the bus tree, search upwards until
 469         * an already allocated iommu table is found and use that.
 470         */
 471
 472        while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
 473                dn = dn->parent;
 474
 475        if (dn && PCI_DN(dn))
 476                dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table;
 477        else
 478                printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
 479                       pci_name(dev));
 480}
 481
 482static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 483{
 484        struct device_node *pdn, *dn;
 485        struct iommu_table *tbl;
 486        const void *dma_window = NULL;
 487        struct pci_dn *pci;
 488
 489        DBG("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
 490
 491        /* dev setup for LPAR is a little tricky, since the device tree might
 492         * contain the dma-window properties per-device and not neccesarily
 493         * for the bus. So we need to search upwards in the tree until we
 494         * either hit a dma-window property, OR find a parent with a table
 495         * already allocated.
 496         */
 497        dn = pci_device_to_OF_node(dev);
 498        DBG("  node is %s\n", dn->full_name);
 499
 500        for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
 501             pdn = pdn->parent) {
 502                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 503                if (dma_window)
 504                        break;
 505        }
 506
 507        if (!pdn || !PCI_DN(pdn)) {
 508                printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
 509                       "no DMA window found for pci dev=%s dn=%s\n",
 510                                 pci_name(dev), dn? dn->full_name : "<null>");
 511                return;
 512        }
 513        DBG("  parent is %s\n", pdn->full_name);
 514
 515        /* Check for parent == NULL so we don't try to setup the empty EADS
 516         * slots on POWER4 machines.
 517         */
 518        if (dma_window == NULL || pdn->parent == NULL) {
 519                DBG("  no dma window for device, linking to parent\n");
 520                dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table;
 521                return;
 522        }
 523
 524        pci = PCI_DN(pdn);
 525        if (!pci->iommu_table) {
 526                /* iommu_table_setparms_lpar needs bussubno. */
 527                pci->bussubno = pci->phb->bus->number;
 528
 529                tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 530                                   pci->phb->node);
 531
 532                iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
 533
 534                pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
 535                DBG("  created table: %p\n", pci->iommu_table);
 536        } else {
 537                DBG("  found DMA window, table: %p\n", pci->iommu_table);
 538        }
 539
 540        dev->dev.archdata.dma_data = pci->iommu_table;
 541}
 542#else  /* CONFIG_PCI */
 543#define pci_dma_bus_setup_pSeries       NULL
 544#define pci_dma_dev_setup_pSeries       NULL
 545#define pci_dma_bus_setup_pSeriesLP     NULL
 546#define pci_dma_dev_setup_pSeriesLP     NULL
 547#endif /* !CONFIG_PCI */
 548
 549static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
 550{
 551        int err = NOTIFY_OK;
 552        struct device_node *np = node;
 553        struct pci_dn *pci = PCI_DN(np);
 554
 555        switch (action) {
 556        case PSERIES_RECONFIG_REMOVE:
 557                if (pci && pci->iommu_table &&
 558                    of_get_property(np, "ibm,dma-window", NULL))
 559                        iommu_free_table(np);
 560                break;
 561        default:
 562                err = NOTIFY_DONE;
 563                break;
 564        }
 565        return err;
 566}
 567
 568static struct notifier_block iommu_reconfig_nb = {
 569        .notifier_call = iommu_reconfig_notifier,
 570};
 571
 572/* These are called very early. */
 573void iommu_init_early_pSeries(void)
 574{
 575        if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) {
 576                /* Direct I/O, IOMMU off */
 577                ppc_md.pci_dma_dev_setup = NULL;
 578                ppc_md.pci_dma_bus_setup = NULL;
 579                set_pci_dma_ops(&dma_direct_ops);
 580                return;
 581        }
 582
 583        if (firmware_has_feature(FW_FEATURE_LPAR)) {
 584                if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
 585                        ppc_md.tce_build = tce_buildmulti_pSeriesLP;
 586                        ppc_md.tce_free  = tce_freemulti_pSeriesLP;
 587                } else {
 588                        ppc_md.tce_build = tce_build_pSeriesLP;
 589                        ppc_md.tce_free  = tce_free_pSeriesLP;
 590                }
 591                ppc_md.tce_get   = tce_get_pSeriesLP;
 592                ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
 593                ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
 594        } else {
 595                ppc_md.tce_build = tce_build_pSeries;
 596                ppc_md.tce_free  = tce_free_pSeries;
 597                ppc_md.tce_get   = tce_get_pseries;
 598                ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeries;
 599                ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeries;
 600        }
 601
 602
 603        pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
 604
 605        set_pci_dma_ops(&dma_iommu_ops);
 606}
 607
 608