linux/arch/x86/kernel/pci-gart_64.c
<<
>>
Prefs
   1/*
   2 * Dynamic DMA mapping support for AMD Hammer.
   3 *
   4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
   5 * This allows to use PCI devices that only support 32bit addresses on systems
   6 * with more than 4GB.
   7 *
   8 * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
   9 *
  10 * Copyright 2002 Andi Kleen, SuSE Labs.
  11 * Subject to the GNU General Public License v2 only.
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/ctype.h>
  16#include <linux/agp_backend.h>
  17#include <linux/init.h>
  18#include <linux/mm.h>
  19#include <linux/sched.h>
  20#include <linux/string.h>
  21#include <linux/spinlock.h>
  22#include <linux/pci.h>
  23#include <linux/module.h>
  24#include <linux/topology.h>
  25#include <linux/interrupt.h>
  26#include <linux/bitmap.h>
  27#include <linux/kdebug.h>
  28#include <linux/scatterlist.h>
  29#include <linux/iommu-helper.h>
  30#include <linux/sysdev.h>
  31#include <linux/io.h>
  32#include <linux/gfp.h>
  33#include <asm/atomic.h>
  34#include <asm/mtrr.h>
  35#include <asm/pgtable.h>
  36#include <asm/proto.h>
  37#include <asm/iommu.h>
  38#include <asm/gart.h>
  39#include <asm/cacheflush.h>
  40#include <asm/swiotlb.h>
  41#include <asm/dma.h>
  42#include <asm/amd_nb.h>
  43#include <asm/x86_init.h>
  44#include <asm/iommu_table.h>
  45
  46static unsigned long iommu_bus_base;    /* GART remapping area (physical) */
  47static unsigned long iommu_size;        /* size of remapping area bytes */
  48static unsigned long iommu_pages;       /* .. and in pages */
  49
  50static u32 *iommu_gatt_base;            /* Remapping table */
  51
  52static dma_addr_t bad_dma_addr;
  53
  54/*
  55 * If this is disabled the IOMMU will use an optimized flushing strategy
  56 * of only flushing when an mapping is reused. With it true the GART is
  57 * flushed for every mapping. Problem is that doing the lazy flush seems
  58 * to trigger bugs with some popular PCI cards, in particular 3ware (but
  59 * has been also also seen with Qlogic at least).
  60 */
  61static int iommu_fullflush = 1;
  62
  63/* Allocation bitmap for the remapping area: */
  64static DEFINE_SPINLOCK(iommu_bitmap_lock);
  65/* Guarded by iommu_bitmap_lock: */
  66static unsigned long *iommu_gart_bitmap;
  67
  68static u32 gart_unmapped_entry;
  69
  70#define GPTE_VALID    1
  71#define GPTE_COHERENT 2
  72#define GPTE_ENCODE(x) \
  73        (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
  74#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
  75
  76#define EMERGENCY_PAGES 32 /* = 128KB */
  77
  78#ifdef CONFIG_AGP
  79#define AGPEXTERN extern
  80#else
  81#define AGPEXTERN
  82#endif
  83
  84/* backdoor interface to AGP driver */
  85AGPEXTERN int agp_memory_reserved;
  86AGPEXTERN __u32 *agp_gatt_table;
  87
  88static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
  89static bool need_flush;         /* global flush state. set for each gart wrap */
  90
  91static unsigned long alloc_iommu(struct device *dev, int size,
  92                                 unsigned long align_mask)
  93{
  94        unsigned long offset, flags;
  95        unsigned long boundary_size;
  96        unsigned long base_index;
  97
  98        base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
  99                           PAGE_SIZE) >> PAGE_SHIFT;
 100        boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
 101                              PAGE_SIZE) >> PAGE_SHIFT;
 102
 103        spin_lock_irqsave(&iommu_bitmap_lock, flags);
 104        offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
 105                                  size, base_index, boundary_size, align_mask);
 106        if (offset == -1) {
 107                need_flush = true;
 108                offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
 109                                          size, base_index, boundary_size,
 110                                          align_mask);
 111        }
 112        if (offset != -1) {
 113                next_bit = offset+size;
 114                if (next_bit >= iommu_pages) {
 115                        next_bit = 0;
 116                        need_flush = true;
 117                }
 118        }
 119        if (iommu_fullflush)
 120                need_flush = true;
 121        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 122
 123        return offset;
 124}
 125
 126static void free_iommu(unsigned long offset, int size)
 127{
 128        unsigned long flags;
 129
 130        spin_lock_irqsave(&iommu_bitmap_lock, flags);
 131        bitmap_clear(iommu_gart_bitmap, offset, size);
 132        if (offset >= next_bit)
 133                next_bit = offset + size;
 134        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 135}
 136
 137/*
 138 * Use global flush state to avoid races with multiple flushers.
 139 */
 140static void flush_gart(void)
 141{
 142        unsigned long flags;
 143
 144        spin_lock_irqsave(&iommu_bitmap_lock, flags);
 145        if (need_flush) {
 146                amd_flush_garts();
 147                need_flush = false;
 148        }
 149        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 150}
 151
 152#ifdef CONFIG_IOMMU_LEAK
 153/* Debugging aid for drivers that don't free their IOMMU tables */
 154static int leak_trace;
 155static int iommu_leak_pages = 20;
 156
 157static void dump_leak(void)
 158{
 159        static int dump;
 160
 161        if (dump)
 162                return;
 163        dump = 1;
 164
 165        show_stack(NULL, NULL);
 166        debug_dma_dump_mappings(NULL);
 167}
 168#endif
 169
 170static void iommu_full(struct device *dev, size_t size, int dir)
 171{
 172        /*
 173         * Ran out of IOMMU space for this operation. This is very bad.
 174         * Unfortunately the drivers cannot handle this operation properly.
 175         * Return some non mapped prereserved space in the aperture and
 176         * let the Northbridge deal with it. This will result in garbage
 177         * in the IO operation. When the size exceeds the prereserved space
 178         * memory corruption will occur or random memory will be DMAed
 179         * out. Hopefully no network devices use single mappings that big.
 180         */
 181
 182        dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
 183
 184        if (size > PAGE_SIZE*EMERGENCY_PAGES) {
 185                if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
 186                        panic("PCI-DMA: Memory would be corrupted\n");
 187                if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
 188                        panic(KERN_ERR
 189                                "PCI-DMA: Random memory would be DMAed\n");
 190        }
 191#ifdef CONFIG_IOMMU_LEAK
 192        dump_leak();
 193#endif
 194}
 195
 196static inline int
 197need_iommu(struct device *dev, unsigned long addr, size_t size)
 198{
 199        return force_iommu || !dma_capable(dev, addr, size);
 200}
 201
 202static inline int
 203nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
 204{
 205        return !dma_capable(dev, addr, size);
 206}
 207
 208/* Map a single continuous physical area into the IOMMU.
 209 * Caller needs to check if the iommu is needed and flush.
 210 */
 211static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 212                                size_t size, int dir, unsigned long align_mask)
 213{
 214        unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
 215        unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
 216        int i;
 217
 218        if (iommu_page == -1) {
 219                if (!nonforced_iommu(dev, phys_mem, size))
 220                        return phys_mem;
 221                if (panic_on_overflow)
 222                        panic("dma_map_area overflow %lu bytes\n", size);
 223                iommu_full(dev, size, dir);
 224                return bad_dma_addr;
 225        }
 226
 227        for (i = 0; i < npages; i++) {
 228                iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
 229                phys_mem += PAGE_SIZE;
 230        }
 231        return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
 232}
 233
 234/* Map a single area into the IOMMU */
 235static dma_addr_t gart_map_page(struct device *dev, struct page *page,
 236                                unsigned long offset, size_t size,
 237                                enum dma_data_direction dir,
 238                                struct dma_attrs *attrs)
 239{
 240        unsigned long bus;
 241        phys_addr_t paddr = page_to_phys(page) + offset;
 242
 243        if (!dev)
 244                dev = &x86_dma_fallback_dev;
 245
 246        if (!need_iommu(dev, paddr, size))
 247                return paddr;
 248
 249        bus = dma_map_area(dev, paddr, size, dir, 0);
 250        flush_gart();
 251
 252        return bus;
 253}
 254
 255/*
 256 * Free a DMA mapping.
 257 */
 258static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
 259                            size_t size, enum dma_data_direction dir,
 260                            struct dma_attrs *attrs)
 261{
 262        unsigned long iommu_page;
 263        int npages;
 264        int i;
 265
 266        if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
 267            dma_addr >= iommu_bus_base + iommu_size)
 268                return;
 269
 270        iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
 271        npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 272        for (i = 0; i < npages; i++) {
 273                iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
 274        }
 275        free_iommu(iommu_page, npages);
 276}
 277
 278/*
 279 * Wrapper for pci_unmap_single working with scatterlists.
 280 */
 281static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 282                          enum dma_data_direction dir, struct dma_attrs *attrs)
 283{
 284        struct scatterlist *s;
 285        int i;
 286
 287        for_each_sg(sg, s, nents, i) {
 288                if (!s->dma_length || !s->length)
 289                        break;
 290                gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
 291        }
 292}
 293
 294/* Fallback for dma_map_sg in case of overflow */
 295static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 296                               int nents, int dir)
 297{
 298        struct scatterlist *s;
 299        int i;
 300
 301#ifdef CONFIG_IOMMU_DEBUG
 302        pr_debug("dma_map_sg overflow\n");
 303#endif
 304
 305        for_each_sg(sg, s, nents, i) {
 306                unsigned long addr = sg_phys(s);
 307
 308                if (nonforced_iommu(dev, addr, s->length)) {
 309                        addr = dma_map_area(dev, addr, s->length, dir, 0);
 310                        if (addr == bad_dma_addr) {
 311                                if (i > 0)
 312                                        gart_unmap_sg(dev, sg, i, dir, NULL);
 313                                nents = 0;
 314                                sg[0].dma_length = 0;
 315                                break;
 316                        }
 317                }
 318                s->dma_address = addr;
 319                s->dma_length = s->length;
 320        }
 321        flush_gart();
 322
 323        return nents;
 324}
 325
 326/* Map multiple scatterlist entries continuous into the first. */
 327static int __dma_map_cont(struct device *dev, struct scatterlist *start,
 328                          int nelems, struct scatterlist *sout,
 329                          unsigned long pages)
 330{
 331        unsigned long iommu_start = alloc_iommu(dev, pages, 0);
 332        unsigned long iommu_page = iommu_start;
 333        struct scatterlist *s;
 334        int i;
 335
 336        if (iommu_start == -1)
 337                return -1;
 338
 339        for_each_sg(start, s, nelems, i) {
 340                unsigned long pages, addr;
 341                unsigned long phys_addr = s->dma_address;
 342
 343                BUG_ON(s != start && s->offset);
 344                if (s == start) {
 345                        sout->dma_address = iommu_bus_base;
 346                        sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
 347                        sout->dma_length = s->length;
 348                } else {
 349                        sout->dma_length += s->length;
 350                }
 351
 352                addr = phys_addr;
 353                pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 354                while (pages--) {
 355                        iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
 356                        addr += PAGE_SIZE;
 357                        iommu_page++;
 358                }
 359        }
 360        BUG_ON(iommu_page - iommu_start != pages);
 361
 362        return 0;
 363}
 364
 365static inline int
 366dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
 367             struct scatterlist *sout, unsigned long pages, int need)
 368{
 369        if (!need) {
 370                BUG_ON(nelems != 1);
 371                sout->dma_address = start->dma_address;
 372                sout->dma_length = start->length;
 373                return 0;
 374        }
 375        return __dma_map_cont(dev, start, nelems, sout, pages);
 376}
 377
 378/*
 379 * DMA map all entries in a scatterlist.
 380 * Merge chunks that have page aligned sizes into a continuous mapping.
 381 */
 382static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 383                       enum dma_data_direction dir, struct dma_attrs *attrs)
 384{
 385        struct scatterlist *s, *ps, *start_sg, *sgmap;
 386        int need = 0, nextneed, i, out, start;
 387        unsigned long pages = 0;
 388        unsigned int seg_size;
 389        unsigned int max_seg_size;
 390
 391        if (nents == 0)
 392                return 0;
 393
 394        if (!dev)
 395                dev = &x86_dma_fallback_dev;
 396
 397        out             = 0;
 398        start           = 0;
 399        start_sg        = sg;
 400        sgmap           = sg;
 401        seg_size        = 0;
 402        max_seg_size    = dma_get_max_seg_size(dev);
 403        ps              = NULL; /* shut up gcc */
 404
 405        for_each_sg(sg, s, nents, i) {
 406                dma_addr_t addr = sg_phys(s);
 407
 408                s->dma_address = addr;
 409                BUG_ON(s->length == 0);
 410
 411                nextneed = need_iommu(dev, addr, s->length);
 412
 413                /* Handle the previous not yet processed entries */
 414                if (i > start) {
 415                        /*
 416                         * Can only merge when the last chunk ends on a
 417                         * page boundary and the new one doesn't have an
 418                         * offset.
 419                         */
 420                        if (!iommu_merge || !nextneed || !need || s->offset ||
 421                            (s->length + seg_size > max_seg_size) ||
 422                            (ps->offset + ps->length) % PAGE_SIZE) {
 423                                if (dma_map_cont(dev, start_sg, i - start,
 424                                                 sgmap, pages, need) < 0)
 425                                        goto error;
 426                                out++;
 427
 428                                seg_size        = 0;
 429                                sgmap           = sg_next(sgmap);
 430                                pages           = 0;
 431                                start           = i;
 432                                start_sg        = s;
 433                        }
 434                }
 435
 436                seg_size += s->length;
 437                need = nextneed;
 438                pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 439                ps = s;
 440        }
 441        if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
 442                goto error;
 443        out++;
 444        flush_gart();
 445        if (out < nents) {
 446                sgmap = sg_next(sgmap);
 447                sgmap->dma_length = 0;
 448        }
 449        return out;
 450
 451error:
 452        flush_gart();
 453        gart_unmap_sg(dev, sg, out, dir, NULL);
 454
 455        /* When it was forced or merged try again in a dumb way */
 456        if (force_iommu || iommu_merge) {
 457                out = dma_map_sg_nonforce(dev, sg, nents, dir);
 458                if (out > 0)
 459                        return out;
 460        }
 461        if (panic_on_overflow)
 462                panic("dma_map_sg: overflow on %lu pages\n", pages);
 463
 464        iommu_full(dev, pages << PAGE_SHIFT, dir);
 465        for_each_sg(sg, s, nents, i)
 466                s->dma_address = bad_dma_addr;
 467        return 0;
 468}
 469
 470/* allocate and map a coherent mapping */
 471static void *
 472gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 473                    gfp_t flag)
 474{
 475        dma_addr_t paddr;
 476        unsigned long align_mask;
 477        struct page *page;
 478
 479        if (force_iommu && !(flag & GFP_DMA)) {
 480                flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
 481                page = alloc_pages(flag | __GFP_ZERO, get_order(size));
 482                if (!page)
 483                        return NULL;
 484
 485                align_mask = (1UL << get_order(size)) - 1;
 486                paddr = dma_map_area(dev, page_to_phys(page), size,
 487                                     DMA_BIDIRECTIONAL, align_mask);
 488
 489                flush_gart();
 490                if (paddr != bad_dma_addr) {
 491                        *dma_addr = paddr;
 492                        return page_address(page);
 493                }
 494                __free_pages(page, get_order(size));
 495        } else
 496                return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
 497
 498        return NULL;
 499}
 500
 501/* free a coherent mapping */
 502static void
 503gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 504                   dma_addr_t dma_addr)
 505{
 506        gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
 507        free_pages((unsigned long)vaddr, get_order(size));
 508}
 509
 510static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
 511{
 512        return (dma_addr == bad_dma_addr);
 513}
 514
 515static int no_agp;
 516
 517static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 518{
 519        unsigned long a;
 520
 521        if (!iommu_size) {
 522                iommu_size = aper_size;
 523                if (!no_agp)
 524                        iommu_size /= 2;
 525        }
 526
 527        a = aper + iommu_size;
 528        iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 529
 530        if (iommu_size < 64*1024*1024) {
 531                pr_warning(
 532                        "PCI-DMA: Warning: Small IOMMU %luMB."
 533                        " Consider increasing the AGP aperture in BIOS\n",
 534                                iommu_size >> 20);
 535        }
 536
 537        return iommu_size;
 538}
 539
 540static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 541{
 542        unsigned aper_size = 0, aper_base_32, aper_order;
 543        u64 aper_base;
 544
 545        pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
 546        pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
 547        aper_order = (aper_order >> 1) & 7;
 548
 549        aper_base = aper_base_32 & 0x7fff;
 550        aper_base <<= 25;
 551
 552        aper_size = (32 * 1024 * 1024) << aper_order;
 553        if (aper_base + aper_size > 0x100000000UL || !aper_size)
 554                aper_base = 0;
 555
 556        *size = aper_size;
 557        return aper_base;
 558}
 559
 560static void enable_gart_translations(void)
 561{
 562        int i;
 563
 564        if (!amd_nb_has_feature(AMD_NB_GART))
 565                return;
 566
 567        for (i = 0; i < amd_nb_num(); i++) {
 568                struct pci_dev *dev = node_to_amd_nb(i)->misc;
 569
 570                enable_gart_translation(dev, __pa(agp_gatt_table));
 571        }
 572
 573        /* Flush the GART-TLB to remove stale entries */
 574        amd_flush_garts();
 575}
 576
 577/*
 578 * If fix_up_north_bridges is set, the north bridges have to be fixed up on
 579 * resume in the same way as they are handled in gart_iommu_hole_init().
 580 */
 581static bool fix_up_north_bridges;
 582static u32 aperture_order;
 583static u32 aperture_alloc;
 584
 585void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
 586{
 587        fix_up_north_bridges = true;
 588        aperture_order = aper_order;
 589        aperture_alloc = aper_alloc;
 590}
 591
 592static void gart_fixup_northbridges(struct sys_device *dev)
 593{
 594        int i;
 595
 596        if (!fix_up_north_bridges)
 597                return;
 598
 599        if (!amd_nb_has_feature(AMD_NB_GART))
 600                return;
 601
 602        pr_info("PCI-DMA: Restoring GART aperture settings\n");
 603
 604        for (i = 0; i < amd_nb_num(); i++) {
 605                struct pci_dev *dev = node_to_amd_nb(i)->misc;
 606
 607                /*
 608                 * Don't enable translations just yet.  That is the next
 609                 * step.  Restore the pre-suspend aperture settings.
 610                 */
 611                gart_set_size_and_enable(dev, aperture_order);
 612                pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
 613        }
 614}
 615
 616static int gart_resume(struct sys_device *dev)
 617{
 618        pr_info("PCI-DMA: Resuming GART IOMMU\n");
 619
 620        gart_fixup_northbridges(dev);
 621
 622        enable_gart_translations();
 623
 624        return 0;
 625}
 626
 627static int gart_suspend(struct sys_device *dev, pm_message_t state)
 628{
 629        return 0;
 630}
 631
 632static struct sysdev_class gart_sysdev_class = {
 633        .name           = "gart",
 634        .suspend        = gart_suspend,
 635        .resume         = gart_resume,
 636
 637};
 638
 639static struct sys_device device_gart = {
 640        .cls            = &gart_sysdev_class,
 641};
 642
 643/*
 644 * Private Northbridge GATT initialization in case we cannot use the
 645 * AGP driver for some reason.
 646 */
 647static __init int init_amd_gatt(struct agp_kern_info *info)
 648{
 649        unsigned aper_size, gatt_size, new_aper_size;
 650        unsigned aper_base, new_aper_base;
 651        struct pci_dev *dev;
 652        void *gatt;
 653        int i, error;
 654
 655        pr_info("PCI-DMA: Disabling AGP.\n");
 656
 657        aper_size = aper_base = info->aper_size = 0;
 658        dev = NULL;
 659        for (i = 0; i < amd_nb_num(); i++) {
 660                dev = node_to_amd_nb(i)->misc;
 661                new_aper_base = read_aperture(dev, &new_aper_size);
 662                if (!new_aper_base)
 663                        goto nommu;
 664
 665                if (!aper_base) {
 666                        aper_size = new_aper_size;
 667                        aper_base = new_aper_base;
 668                }
 669                if (aper_size != new_aper_size || aper_base != new_aper_base)
 670                        goto nommu;
 671        }
 672        if (!aper_base)
 673                goto nommu;
 674
 675        info->aper_base = aper_base;
 676        info->aper_size = aper_size >> 20;
 677
 678        gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
 679        gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 680                                        get_order(gatt_size));
 681        if (!gatt)
 682                panic("Cannot allocate GATT table");
 683        if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 684                panic("Could not set GART PTEs to uncacheable pages");
 685
 686        agp_gatt_table = gatt;
 687
 688        error = sysdev_class_register(&gart_sysdev_class);
 689        if (!error)
 690                error = sysdev_register(&device_gart);
 691        if (error)
 692                panic("Could not register gart_sysdev -- "
 693                      "would corrupt data on next suspend");
 694
 695        flush_gart();
 696
 697        pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
 698               aper_base, aper_size>>10);
 699
 700        return 0;
 701
 702 nommu:
 703        /* Should not happen anymore */
 704        pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
 705               "falling back to iommu=soft.\n");
 706        return -1;
 707}
 708
 709static struct dma_map_ops gart_dma_ops = {
 710        .map_sg                         = gart_map_sg,
 711        .unmap_sg                       = gart_unmap_sg,
 712        .map_page                       = gart_map_page,
 713        .unmap_page                     = gart_unmap_page,
 714        .alloc_coherent                 = gart_alloc_coherent,
 715        .free_coherent                  = gart_free_coherent,
 716        .mapping_error                  = gart_mapping_error,
 717};
 718
 719static void gart_iommu_shutdown(void)
 720{
 721        struct pci_dev *dev;
 722        int i;
 723
 724        /* don't shutdown it if there is AGP installed */
 725        if (!no_agp)
 726                return;
 727
 728        if (!amd_nb_has_feature(AMD_NB_GART))
 729                return;
 730
 731        for (i = 0; i < amd_nb_num(); i++) {
 732                u32 ctl;
 733
 734                dev = node_to_amd_nb(i)->misc;
 735                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 736
 737                ctl &= ~GARTEN;
 738
 739                pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 740        }
 741}
 742
 743int __init gart_iommu_init(void)
 744{
 745        struct agp_kern_info info;
 746        unsigned long iommu_start;
 747        unsigned long aper_base, aper_size;
 748        unsigned long start_pfn, end_pfn;
 749        unsigned long scratch;
 750        long i;
 751
 752        if (!amd_nb_has_feature(AMD_NB_GART))
 753                return 0;
 754
 755#ifndef CONFIG_AGP_AMD64
 756        no_agp = 1;
 757#else
 758        /* Makefile puts PCI initialization via subsys_initcall first. */
 759        /* Add other AMD AGP bridge drivers here */
 760        no_agp = no_agp ||
 761                (agp_amd64_init() < 0) ||
 762                (agp_copy_info(agp_bridge, &info) < 0);
 763#endif
 764
 765        if (no_iommu ||
 766            (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
 767            !gart_iommu_aperture ||
 768            (no_agp && init_amd_gatt(&info) < 0)) {
 769                if (max_pfn > MAX_DMA32_PFN) {
 770                        pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
 771                        pr_warning("falling back to iommu=soft.\n");
 772                }
 773                return 0;
 774        }
 775
 776        /* need to map that range */
 777        aper_size       = info.aper_size << 20;
 778        aper_base       = info.aper_base;
 779        end_pfn         = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
 780
 781        if (end_pfn > max_low_pfn_mapped) {
 782                start_pfn = (aper_base>>PAGE_SHIFT);
 783                init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
 784        }
 785
 786        pr_info("PCI-DMA: using GART IOMMU.\n");
 787        iommu_size = check_iommu_size(info.aper_base, aper_size);
 788        iommu_pages = iommu_size >> PAGE_SHIFT;
 789
 790        iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 791                                                      get_order(iommu_pages/8));
 792        if (!iommu_gart_bitmap)
 793                panic("Cannot allocate iommu bitmap\n");
 794
 795#ifdef CONFIG_IOMMU_LEAK
 796        if (leak_trace) {
 797                int ret;
 798
 799                ret = dma_debug_resize_entries(iommu_pages);
 800                if (ret)
 801                        pr_debug("PCI-DMA: Cannot trace all the entries\n");
 802        }
 803#endif
 804
 805        /*
 806         * Out of IOMMU space handling.
 807         * Reserve some invalid pages at the beginning of the GART.
 808         */
 809        bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
 810
 811        pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
 812               iommu_size >> 20);
 813
 814        agp_memory_reserved     = iommu_size;
 815        iommu_start             = aper_size - iommu_size;
 816        iommu_bus_base          = info.aper_base + iommu_start;
 817        bad_dma_addr            = iommu_bus_base;
 818        iommu_gatt_base         = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
 819
 820        /*
 821         * Unmap the IOMMU part of the GART. The alias of the page is
 822         * always mapped with cache enabled and there is no full cache
 823         * coherency across the GART remapping. The unmapping avoids
 824         * automatic prefetches from the CPU allocating cache lines in
 825         * there. All CPU accesses are done via the direct mapping to
 826         * the backing memory. The GART address is only used by PCI
 827         * devices.
 828         */
 829        set_memory_np((unsigned long)__va(iommu_bus_base),
 830                                iommu_size >> PAGE_SHIFT);
 831        /*
 832         * Tricky. The GART table remaps the physical memory range,
 833         * so the CPU wont notice potential aliases and if the memory
 834         * is remapped to UC later on, we might surprise the PCI devices
 835         * with a stray writeout of a cacheline. So play it sure and
 836         * do an explicit, full-scale wbinvd() _after_ having marked all
 837         * the pages as Not-Present:
 838         */
 839        wbinvd();
 840
 841        /*
 842         * Now all caches are flushed and we can safely enable
 843         * GART hardware.  Doing it early leaves the possibility
 844         * of stale cache entries that can lead to GART PTE
 845         * errors.
 846         */
 847        enable_gart_translations();
 848
 849        /*
 850         * Try to workaround a bug (thanks to BenH):
 851         * Set unmapped entries to a scratch page instead of 0.
 852         * Any prefetches that hit unmapped entries won't get an bus abort
 853         * then. (P2P bridge may be prefetching on DMA reads).
 854         */
 855        scratch = get_zeroed_page(GFP_KERNEL);
 856        if (!scratch)
 857                panic("Cannot allocate iommu scratch page");
 858        gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
 859        for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
 860                iommu_gatt_base[i] = gart_unmapped_entry;
 861
 862        flush_gart();
 863        dma_ops = &gart_dma_ops;
 864        x86_platform.iommu_shutdown = gart_iommu_shutdown;
 865        swiotlb = 0;
 866
 867        return 0;
 868}
 869
 870void __init gart_parse_options(char *p)
 871{
 872        int arg;
 873
 874#ifdef CONFIG_IOMMU_LEAK
 875        if (!strncmp(p, "leak", 4)) {
 876                leak_trace = 1;
 877                p += 4;
 878                if (*p == '=')
 879                        ++p;
 880                if (isdigit(*p) && get_option(&p, &arg))
 881                        iommu_leak_pages = arg;
 882        }
 883#endif
 884        if (isdigit(*p) && get_option(&p, &arg))
 885                iommu_size = arg;
 886        if (!strncmp(p, "fullflush", 9))
 887                iommu_fullflush = 1;
 888        if (!strncmp(p, "nofullflush", 11))
 889                iommu_fullflush = 0;
 890        if (!strncmp(p, "noagp", 5))
 891                no_agp = 1;
 892        if (!strncmp(p, "noaperture", 10))
 893                fix_aperture = 0;
 894        /* duplicated from pci-dma.c */
 895        if (!strncmp(p, "force", 5))
 896                gart_iommu_aperture_allowed = 1;
 897        if (!strncmp(p, "allowed", 7))
 898                gart_iommu_aperture_allowed = 1;
 899        if (!strncmp(p, "memaper", 7)) {
 900                fallback_aper_force = 1;
 901                p += 7;
 902                if (*p == '=') {
 903                        ++p;
 904                        if (get_option(&p, &arg))
 905                                fallback_aper_order = arg;
 906                }
 907        }
 908}
 909IOMMU_INIT_POST(gart_iommu_hole_init);
 910