linux/arch/arm/mm/dma-mapping.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/mm/dma-mapping.c
   3 *
   4 *  Copyright (C) 2000-2004 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 *
  10 *  DMA uncached mapping support.
  11 */
  12#include <linux/module.h>
  13#include <linux/mm.h>
  14#include <linux/gfp.h>
  15#include <linux/errno.h>
  16#include <linux/list.h>
  17#include <linux/init.h>
  18#include <linux/device.h>
  19#include <linux/dma-mapping.h>
  20#include <linux/highmem.h>
  21
  22#include <asm/memory.h>
  23#include <asm/highmem.h>
  24#include <asm/cacheflush.h>
  25#include <asm/tlbflush.h>
  26#include <asm/sizes.h>
  27
  28static u64 get_coherent_dma_mask(struct device *dev)
  29{
  30        u64 mask = ISA_DMA_THRESHOLD;
  31
  32        if (dev) {
  33                mask = dev->coherent_dma_mask;
  34
  35                /*
  36                 * Sanity check the DMA mask - it must be non-zero, and
  37                 * must be able to be satisfied by a DMA allocation.
  38                 */
  39                if (mask == 0) {
  40                        dev_warn(dev, "coherent DMA mask is unset\n");
  41                        return 0;
  42                }
  43
  44                if ((~mask) & ISA_DMA_THRESHOLD) {
  45                        dev_warn(dev, "coherent DMA mask %#llx is smaller "
  46                                 "than system GFP_DMA mask %#llx\n",
  47                                 mask, (unsigned long long)ISA_DMA_THRESHOLD);
  48                        return 0;
  49                }
  50        }
  51
  52        return mask;
  53}
  54
  55/*
  56 * Allocate a DMA buffer for 'dev' of size 'size' using the
  57 * specified gfp mask.  Note that 'size' must be page aligned.
  58 */
  59static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
  60{
  61        unsigned long order = get_order(size);
  62        struct page *page, *p, *e;
  63        void *ptr;
  64        u64 mask = get_coherent_dma_mask(dev);
  65
  66#ifdef CONFIG_DMA_API_DEBUG
  67        u64 limit = (mask + 1) & ~mask;
  68        if (limit && size >= limit) {
  69                dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
  70                        size, mask);
  71                return NULL;
  72        }
  73#endif
  74
  75        if (!mask)
  76                return NULL;
  77
  78        if (mask < 0xffffffffULL)
  79                gfp |= GFP_DMA;
  80
  81        page = alloc_pages(gfp, order);
  82        if (!page)
  83                return NULL;
  84
  85        /*
  86         * Now split the huge page and free the excess pages
  87         */
  88        split_page(page, order);
  89        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
  90                __free_page(p);
  91
  92        /*
  93         * Ensure that the allocated pages are zeroed, and that any data
  94         * lurking in the kernel direct-mapped region is invalidated.
  95         */
  96        ptr = page_address(page);
  97        memset(ptr, 0, size);
  98        dmac_flush_range(ptr, ptr + size);
  99        outer_flush_range(__pa(ptr), __pa(ptr) + size);
 100
 101        return page;
 102}
 103
 104/*
 105 * Free a DMA buffer.  'size' must be page aligned.
 106 */
 107static void __dma_free_buffer(struct page *page, size_t size)
 108{
 109        struct page *e = page + (size >> PAGE_SHIFT);
 110
 111        while (page < e) {
 112                __free_page(page);
 113                page++;
 114        }
 115}
 116
 117#ifdef CONFIG_MMU
 118/* Sanity check size */
 119#if (CONSISTENT_DMA_SIZE % SZ_2M)
 120#error "CONSISTENT_DMA_SIZE must be multiple of 2MiB"
 121#endif
 122
 123#define CONSISTENT_OFFSET(x)    (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
 124#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT)
 125#define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT)
 126
 127/*
 128 * These are the page tables (2MB each) covering uncached, DMA consistent allocations
 129 */
 130static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
 131
 132#include "vmregion.h"
 133
 134static struct arm_vmregion_head consistent_head = {
 135        .vm_lock        = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
 136        .vm_list        = LIST_HEAD_INIT(consistent_head.vm_list),
 137        .vm_start       = CONSISTENT_BASE,
 138        .vm_end         = CONSISTENT_END,
 139};
 140
 141#ifdef CONFIG_HUGETLB_PAGE
 142#error ARM Coherent DMA allocator does not (yet) support huge TLB
 143#endif
 144
 145/*
 146 * Initialise the consistent memory allocation.
 147 */
 148static int __init consistent_init(void)
 149{
 150        int ret = 0;
 151        pgd_t *pgd;
 152        pmd_t *pmd;
 153        pte_t *pte;
 154        int i = 0;
 155        u32 base = CONSISTENT_BASE;
 156
 157        do {
 158                pgd = pgd_offset(&init_mm, base);
 159                pmd = pmd_alloc(&init_mm, pgd, base);
 160                if (!pmd) {
 161                        printk(KERN_ERR "%s: no pmd tables\n", __func__);
 162                        ret = -ENOMEM;
 163                        break;
 164                }
 165                WARN_ON(!pmd_none(*pmd));
 166
 167                pte = pte_alloc_kernel(pmd, base);
 168                if (!pte) {
 169                        printk(KERN_ERR "%s: no pte tables\n", __func__);
 170                        ret = -ENOMEM;
 171                        break;
 172                }
 173
 174                consistent_pte[i++] = pte;
 175                base += (1 << PGDIR_SHIFT);
 176        } while (base < CONSISTENT_END);
 177
 178        return ret;
 179}
 180
 181core_initcall(consistent_init);
 182
 183static void *
 184__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
 185{
 186        struct arm_vmregion *c;
 187        size_t align;
 188        int bit;
 189
 190        if (!consistent_pte[0]) {
 191                printk(KERN_ERR "%s: not initialised\n", __func__);
 192                dump_stack();
 193                return NULL;
 194        }
 195
 196        /*
 197         * Align the virtual region allocation - maximum alignment is
 198         * a section size, minimum is a page size.  This helps reduce
 199         * fragmentation of the DMA space, and also prevents allocations
 200         * smaller than a section from crossing a section boundary.
 201         */
 202        bit = fls(size - 1);
 203        if (bit > SECTION_SHIFT)
 204                bit = SECTION_SHIFT;
 205        align = 1 << bit;
 206
 207        /*
 208         * Allocate a virtual address in the consistent mapping region.
 209         */
 210        c = arm_vmregion_alloc(&consistent_head, align, size,
 211                            gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 212        if (c) {
 213                pte_t *pte;
 214                int idx = CONSISTENT_PTE_INDEX(c->vm_start);
 215                u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
 216
 217                pte = consistent_pte[idx] + off;
 218                c->vm_pages = page;
 219
 220                do {
 221                        BUG_ON(!pte_none(*pte));
 222
 223                        set_pte_ext(pte, mk_pte(page, prot), 0);
 224                        page++;
 225                        pte++;
 226                        off++;
 227                        if (off >= PTRS_PER_PTE) {
 228                                off = 0;
 229                                pte = consistent_pte[++idx];
 230                        }
 231                } while (size -= PAGE_SIZE);
 232
 233                dsb();
 234
 235                return (void *)c->vm_start;
 236        }
 237        return NULL;
 238}
 239
 240static void __dma_free_remap(void *cpu_addr, size_t size)
 241{
 242        struct arm_vmregion *c;
 243        unsigned long addr;
 244        pte_t *ptep;
 245        int idx;
 246        u32 off;
 247
 248        c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
 249        if (!c) {
 250                printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
 251                       __func__, cpu_addr);
 252                dump_stack();
 253                return;
 254        }
 255
 256        if ((c->vm_end - c->vm_start) != size) {
 257                printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
 258                       __func__, c->vm_end - c->vm_start, size);
 259                dump_stack();
 260                size = c->vm_end - c->vm_start;
 261        }
 262
 263        idx = CONSISTENT_PTE_INDEX(c->vm_start);
 264        off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
 265        ptep = consistent_pte[idx] + off;
 266        addr = c->vm_start;
 267        do {
 268                pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
 269
 270                ptep++;
 271                addr += PAGE_SIZE;
 272                off++;
 273                if (off >= PTRS_PER_PTE) {
 274                        off = 0;
 275                        ptep = consistent_pte[++idx];
 276                }
 277
 278                if (pte_none(pte) || !pte_present(pte))
 279                        printk(KERN_CRIT "%s: bad page in kernel page table\n",
 280                               __func__);
 281        } while (size -= PAGE_SIZE);
 282
 283        flush_tlb_kernel_range(c->vm_start, c->vm_end);
 284
 285        arm_vmregion_free(&consistent_head, c);
 286}
 287
 288#else   /* !CONFIG_MMU */
 289
 290#define __dma_alloc_remap(page, size, gfp, prot)        page_address(page)
 291#define __dma_free_remap(addr, size)                    do { } while (0)
 292
 293#endif  /* CONFIG_MMU */
 294
 295static void *
 296__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 297            pgprot_t prot)
 298{
 299        struct page *page;
 300        void *addr;
 301
 302        *handle = ~0;
 303        size = PAGE_ALIGN(size);
 304
 305        page = __dma_alloc_buffer(dev, size, gfp);
 306        if (!page)
 307                return NULL;
 308
 309        if (!arch_is_coherent())
 310                addr = __dma_alloc_remap(page, size, gfp, prot);
 311        else
 312                addr = page_address(page);
 313
 314        if (addr)
 315                *handle = pfn_to_dma(dev, page_to_pfn(page));
 316
 317        return addr;
 318}
 319
 320/*
 321 * Allocate DMA-coherent memory space and return both the kernel remapped
 322 * virtual and bus address for that space.
 323 */
 324void *
 325dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 326{
 327        void *memory;
 328
 329        if (dma_alloc_from_coherent(dev, size, handle, &memory))
 330                return memory;
 331
 332        return __dma_alloc(dev, size, handle, gfp,
 333                           pgprot_dmacoherent(pgprot_kernel));
 334}
 335EXPORT_SYMBOL(dma_alloc_coherent);
 336
 337/*
 338 * Allocate a writecombining region, in much the same way as
 339 * dma_alloc_coherent above.
 340 */
 341void *
 342dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 343{
 344        return __dma_alloc(dev, size, handle, gfp,
 345                           pgprot_writecombine(pgprot_kernel));
 346}
 347EXPORT_SYMBOL(dma_alloc_writecombine);
 348
 349static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
 350                    void *cpu_addr, dma_addr_t dma_addr, size_t size)
 351{
 352        int ret = -ENXIO;
 353#ifdef CONFIG_MMU
 354        unsigned long user_size, kern_size;
 355        struct arm_vmregion *c;
 356
 357        user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 358
 359        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
 360        if (c) {
 361                unsigned long off = vma->vm_pgoff;
 362
 363                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
 364
 365                if (off < kern_size &&
 366                    user_size <= (kern_size - off)) {
 367                        ret = remap_pfn_range(vma, vma->vm_start,
 368                                              page_to_pfn(c->vm_pages) + off,
 369                                              user_size << PAGE_SHIFT,
 370                                              vma->vm_page_prot);
 371                }
 372        }
 373#endif  /* CONFIG_MMU */
 374
 375        return ret;
 376}
 377
 378int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 379                      void *cpu_addr, dma_addr_t dma_addr, size_t size)
 380{
 381        vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
 382        return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
 383}
 384EXPORT_SYMBOL(dma_mmap_coherent);
 385
 386int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
 387                          void *cpu_addr, dma_addr_t dma_addr, size_t size)
 388{
 389        vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 390        return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
 391}
 392EXPORT_SYMBOL(dma_mmap_writecombine);
 393
 394/*
 395 * free a page as defined by the above mapping.
 396 * Must not be called with IRQs disabled.
 397 */
 398void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
 399{
 400        WARN_ON(irqs_disabled());
 401
 402        if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 403                return;
 404
 405        size = PAGE_ALIGN(size);
 406
 407        if (!arch_is_coherent())
 408                __dma_free_remap(cpu_addr, size);
 409
 410        __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
 411}
 412EXPORT_SYMBOL(dma_free_coherent);
 413
 414/*
 415 * Make an area consistent for devices.
 416 * Note: Drivers should NOT use this function directly, as it will break
 417 * platforms with CONFIG_DMABOUNCE.
 418 * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
 419 */
 420void ___dma_single_cpu_to_dev(const void *kaddr, size_t size,
 421        enum dma_data_direction dir)
 422{
 423        unsigned long paddr;
 424
 425        BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
 426
 427        dmac_map_area(kaddr, size, dir);
 428
 429        paddr = __pa(kaddr);
 430        if (dir == DMA_FROM_DEVICE) {
 431                outer_inv_range(paddr, paddr + size);
 432        } else {
 433                outer_clean_range(paddr, paddr + size);
 434        }
 435        /* FIXME: non-speculating: flush on bidirectional mappings? */
 436}
 437EXPORT_SYMBOL(___dma_single_cpu_to_dev);
 438
 439void ___dma_single_dev_to_cpu(const void *kaddr, size_t size,
 440        enum dma_data_direction dir)
 441{
 442        BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
 443
 444        /* FIXME: non-speculating: not required */
 445        /* don't bother invalidating if DMA to device */
 446        if (dir != DMA_TO_DEVICE) {
 447                unsigned long paddr = __pa(kaddr);
 448                outer_inv_range(paddr, paddr + size);
 449        }
 450
 451        dmac_unmap_area(kaddr, size, dir);
 452}
 453EXPORT_SYMBOL(___dma_single_dev_to_cpu);
 454
 455static void dma_cache_maint_page(struct page *page, unsigned long offset,
 456        size_t size, enum dma_data_direction dir,
 457        void (*op)(const void *, size_t, int))
 458{
 459        /*
 460         * A single sg entry may refer to multiple physically contiguous
 461         * pages.  But we still need to process highmem pages individually.
 462         * If highmem is not configured then the bulk of this loop gets
 463         * optimized out.
 464         */
 465        size_t left = size;
 466        do {
 467                size_t len = left;
 468                void *vaddr;
 469
 470                if (PageHighMem(page)) {
 471                        if (len + offset > PAGE_SIZE) {
 472                                if (offset >= PAGE_SIZE) {
 473                                        page += offset / PAGE_SIZE;
 474                                        offset %= PAGE_SIZE;
 475                                }
 476                                len = PAGE_SIZE - offset;
 477                        }
 478                        vaddr = kmap_high_get(page);
 479                        if (vaddr) {
 480                                vaddr += offset;
 481                                op(vaddr, len, dir);
 482                                kunmap_high(page);
 483                        } else if (cache_is_vipt()) {
 484                                /* unmapped pages might still be cached */
 485                                vaddr = kmap_atomic(page);
 486                                op(vaddr + offset, len, dir);
 487                                kunmap_atomic(vaddr);
 488                        }
 489                } else {
 490                        vaddr = page_address(page) + offset;
 491                        op(vaddr, len, dir);
 492                }
 493                offset = 0;
 494                page++;
 495                left -= len;
 496        } while (left);
 497}
 498
 499void ___dma_page_cpu_to_dev(struct page *page, unsigned long off,
 500        size_t size, enum dma_data_direction dir)
 501{
 502        unsigned long paddr;
 503
 504        dma_cache_maint_page(page, off, size, dir, dmac_map_area);
 505
 506        paddr = page_to_phys(page) + off;
 507        if (dir == DMA_FROM_DEVICE) {
 508                outer_inv_range(paddr, paddr + size);
 509        } else {
 510                outer_clean_range(paddr, paddr + size);
 511        }
 512        /* FIXME: non-speculating: flush on bidirectional mappings? */
 513}
 514EXPORT_SYMBOL(___dma_page_cpu_to_dev);
 515
 516void ___dma_page_dev_to_cpu(struct page *page, unsigned long off,
 517        size_t size, enum dma_data_direction dir)
 518{
 519        unsigned long paddr = page_to_phys(page) + off;
 520
 521        /* FIXME: non-speculating: not required */
 522        /* don't bother invalidating if DMA to device */
 523        if (dir != DMA_TO_DEVICE)
 524                outer_inv_range(paddr, paddr + size);
 525
 526        dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
 527
 528        /*
 529         * Mark the D-cache clean for this page to avoid extra flushing.
 530         */
 531        if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
 532                set_bit(PG_dcache_clean, &page->flags);
 533}
 534EXPORT_SYMBOL(___dma_page_dev_to_cpu);
 535
 536/**
 537 * dma_map_sg - map a set of SG buffers for streaming mode DMA
 538 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 539 * @sg: list of buffers
 540 * @nents: number of buffers to map
 541 * @dir: DMA transfer direction
 542 *
 543 * Map a set of buffers described by scatterlist in streaming mode for DMA.
 544 * This is the scatter-gather version of the dma_map_single interface.
 545 * Here the scatter gather list elements are each tagged with the
 546 * appropriate dma address and length.  They are obtained via
 547 * sg_dma_{address,length}.
 548 *
 549 * Device ownership issues as mentioned for dma_map_single are the same
 550 * here.
 551 */
 552int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 553                enum dma_data_direction dir)
 554{
 555        struct scatterlist *s;
 556        int i, j;
 557
 558        BUG_ON(!valid_dma_direction(dir));
 559
 560        for_each_sg(sg, s, nents, i) {
 561                s->dma_address = __dma_map_page(dev, sg_page(s), s->offset,
 562                                                s->length, dir);
 563                if (dma_mapping_error(dev, s->dma_address))
 564                        goto bad_mapping;
 565        }
 566        debug_dma_map_sg(dev, sg, nents, nents, dir);
 567        return nents;
 568
 569 bad_mapping:
 570        for_each_sg(sg, s, i, j)
 571                __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 572        return 0;
 573}
 574EXPORT_SYMBOL(dma_map_sg);
 575
 576/**
 577 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
 578 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 579 * @sg: list of buffers
 580 * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
 581 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 582 *
 583 * Unmap a set of streaming mode DMA translations.  Again, CPU access
 584 * rules concerning calls here are the same as for dma_unmap_single().
 585 */
 586void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 587                enum dma_data_direction dir)
 588{
 589        struct scatterlist *s;
 590        int i;
 591
 592        debug_dma_unmap_sg(dev, sg, nents, dir);
 593
 594        for_each_sg(sg, s, nents, i)
 595                __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
 596}
 597EXPORT_SYMBOL(dma_unmap_sg);
 598
 599/**
 600 * dma_sync_sg_for_cpu
 601 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 602 * @sg: list of buffers
 603 * @nents: number of buffers to map (returned from dma_map_sg)
 604 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 605 */
 606void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 607                        int nents, enum dma_data_direction dir)
 608{
 609        struct scatterlist *s;
 610        int i;
 611
 612        for_each_sg(sg, s, nents, i) {
 613                if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0,
 614                                            sg_dma_len(s), dir))
 615                        continue;
 616
 617                __dma_page_dev_to_cpu(sg_page(s), s->offset,
 618                                      s->length, dir);
 619        }
 620
 621        debug_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 622}
 623EXPORT_SYMBOL(dma_sync_sg_for_cpu);
 624
 625/**
 626 * dma_sync_sg_for_device
 627 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 628 * @sg: list of buffers
 629 * @nents: number of buffers to map (returned from dma_map_sg)
 630 * @dir: DMA transfer direction (same as was passed to dma_map_sg)
 631 */
 632void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 633                        int nents, enum dma_data_direction dir)
 634{
 635        struct scatterlist *s;
 636        int i;
 637
 638        for_each_sg(sg, s, nents, i) {
 639                if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0,
 640                                        sg_dma_len(s), dir))
 641                        continue;
 642
 643                __dma_page_cpu_to_dev(sg_page(s), s->offset,
 644                                      s->length, dir);
 645        }
 646
 647        debug_dma_sync_sg_for_device(dev, sg, nents, dir);
 648}
 649EXPORT_SYMBOL(dma_sync_sg_for_device);
 650
 651#define PREALLOC_DMA_DEBUG_ENTRIES      4096
 652
 653static int __init dma_debug_do_init(void)
 654{
 655        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 656        return 0;
 657}
 658fs_initcall(dma_debug_do_init);
 659