linux/arch/arc/mm/dma.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 */
   8
   9/*
  10 * DMA Coherent API Notes
  11 *
  12 * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
  13 * implemented by accessing it using a kernel virtual address, with
  14 * Cache bit off in the TLB entry.
  15 *
  16 * The default DMA address == Phy address which is 0x8000_0000 based.
  17 */
  18
  19#include <linux/dma-mapping.h>
  20#include <asm/cache.h>
  21#include <asm/cacheflush.h>
  22
  23
  24static void *arc_dma_alloc(struct device *dev, size_t size,
  25                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
  26{
  27        unsigned long order = get_order(size);
  28        struct page *page;
  29        phys_addr_t paddr;
  30        void *kvaddr;
  31        int need_coh = 1, need_kvaddr = 0;
  32
  33        page = alloc_pages(gfp, order);
  34        if (!page)
  35                return NULL;
  36
  37        /*
  38         * IOC relies on all data (even coherent DMA data) being in cache
  39         * Thus allocate normal cached memory
  40         *
  41         * The gains with IOC are two pronged:
  42         *   -For streaming data, elides need for cache maintenance, saving
  43         *    cycles in flush code, and bus bandwidth as all the lines of a
  44         *    buffer need to be flushed out to memory
  45         *   -For coherent data, Read/Write to buffers terminate early in cache
  46         *   (vs. always going to memory - thus are faster)
  47         */
  48        if ((is_isa_arcv2() && ioc_enable) ||
  49            (attrs & DMA_ATTR_NON_CONSISTENT))
  50                need_coh = 0;
  51
  52        /*
  53         * - A coherent buffer needs MMU mapping to enforce non-cachability
  54         * - A highmem page needs a virtual handle (hence MMU mapping)
  55         *   independent of cachability
  56         */
  57        if (PageHighMem(page) || need_coh)
  58                need_kvaddr = 1;
  59
  60        /* This is linear addr (0x8000_0000 based) */
  61        paddr = page_to_phys(page);
  62
  63        *dma_handle = paddr;
  64
  65        /* This is kernel Virtual address (0x7000_0000 based) */
  66        if (need_kvaddr) {
  67                kvaddr = ioremap_nocache(paddr, size);
  68                if (kvaddr == NULL) {
  69                        __free_pages(page, order);
  70                        return NULL;
  71                }
  72        } else {
  73                kvaddr = (void *)(u32)paddr;
  74        }
  75
  76        /*
  77         * Evict any existing L1 and/or L2 lines for the backing page
  78         * in case it was used earlier as a normal "cached" page.
  79         * Yeah this bit us - STAR 9000898266
  80         *
  81         * Although core does call flush_cache_vmap(), it gets kvaddr hence
  82         * can't be used to efficiently flush L1 and/or L2 which need paddr
  83         * Currently flush_cache_vmap nukes the L1 cache completely which
  84         * will be optimized as a separate commit
  85         */
  86        if (need_coh)
  87                dma_cache_wback_inv(paddr, size);
  88
  89        return kvaddr;
  90}
  91
  92static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
  93                dma_addr_t dma_handle, unsigned long attrs)
  94{
  95        phys_addr_t paddr = dma_handle;
  96        struct page *page = virt_to_page(paddr);
  97        int is_non_coh = 1;
  98
  99        is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
 100                        (is_isa_arcv2() && ioc_enable);
 101
 102        if (PageHighMem(page) || !is_non_coh)
 103                iounmap((void __force __iomem *)vaddr);
 104
 105        __free_pages(page, get_order(size));
 106}
 107
 108static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 109                        void *cpu_addr, dma_addr_t dma_addr, size_t size,
 110                        unsigned long attrs)
 111{
 112        unsigned long user_count = vma_pages(vma);
 113        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 114        unsigned long pfn = __phys_to_pfn(dma_addr);
 115        unsigned long off = vma->vm_pgoff;
 116        int ret = -ENXIO;
 117
 118        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 119
 120        if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 121                return ret;
 122
 123        if (off < count && user_count <= (count - off)) {
 124                ret = remap_pfn_range(vma, vma->vm_start,
 125                                      pfn + off,
 126                                      user_count << PAGE_SHIFT,
 127                                      vma->vm_page_prot);
 128        }
 129
 130        return ret;
 131}
 132
 133/*
 134 * streaming DMA Mapping API...
 135 * CPU accesses page via normal paddr, thus needs to explicitly made
 136 * consistent before each use
 137 */
 138static void _dma_cache_sync(phys_addr_t paddr, size_t size,
 139                enum dma_data_direction dir)
 140{
 141        switch (dir) {
 142        case DMA_FROM_DEVICE:
 143                dma_cache_inv(paddr, size);
 144                break;
 145        case DMA_TO_DEVICE:
 146                dma_cache_wback(paddr, size);
 147                break;
 148        case DMA_BIDIRECTIONAL:
 149                dma_cache_wback_inv(paddr, size);
 150                break;
 151        default:
 152                pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr);
 153        }
 154}
 155
 156/*
 157 * arc_dma_map_page - map a portion of a page for streaming DMA
 158 *
 159 * Ensure that any data held in the cache is appropriately discarded
 160 * or written back.
 161 *
 162 * The device owns this memory once this call has completed.  The CPU
 163 * can regain ownership by calling dma_unmap_page().
 164 *
 165 * Note: while it takes struct page as arg, caller can "abuse" it to pass
 166 * a region larger than PAGE_SIZE, provided it is physically contiguous
 167 * and this still works correctly
 168 */
 169static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
 170                unsigned long offset, size_t size, enum dma_data_direction dir,
 171                unsigned long attrs)
 172{
 173        phys_addr_t paddr = page_to_phys(page) + offset;
 174
 175        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 176                _dma_cache_sync(paddr, size, dir);
 177
 178        return paddr;
 179}
 180
 181/*
 182 * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
 183 *
 184 * After this call, reads by the CPU to the buffer are guaranteed to see
 185 * whatever the device wrote there.
 186 *
 187 * Note: historically this routine was not implemented for ARC
 188 */
 189static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle,
 190                               size_t size, enum dma_data_direction dir,
 191                               unsigned long attrs)
 192{
 193        phys_addr_t paddr = handle;
 194
 195        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 196                _dma_cache_sync(paddr, size, dir);
 197}
 198
 199static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
 200           int nents, enum dma_data_direction dir, unsigned long attrs)
 201{
 202        struct scatterlist *s;
 203        int i;
 204
 205        for_each_sg(sg, s, nents, i)
 206                s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
 207                                               s->length, dir);
 208
 209        return nents;
 210}
 211
 212static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 213                             int nents, enum dma_data_direction dir,
 214                             unsigned long attrs)
 215{
 216        struct scatterlist *s;
 217        int i;
 218
 219        for_each_sg(sg, s, nents, i)
 220                arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir,
 221                                   attrs);
 222}
 223
 224static void arc_dma_sync_single_for_cpu(struct device *dev,
 225                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 226{
 227        _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
 228}
 229
 230static void arc_dma_sync_single_for_device(struct device *dev,
 231                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 232{
 233        _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
 234}
 235
 236static void arc_dma_sync_sg_for_cpu(struct device *dev,
 237                struct scatterlist *sglist, int nelems,
 238                enum dma_data_direction dir)
 239{
 240        int i;
 241        struct scatterlist *sg;
 242
 243        for_each_sg(sglist, sg, nelems, i)
 244                _dma_cache_sync(sg_phys(sg), sg->length, dir);
 245}
 246
 247static void arc_dma_sync_sg_for_device(struct device *dev,
 248                struct scatterlist *sglist, int nelems,
 249                enum dma_data_direction dir)
 250{
 251        int i;
 252        struct scatterlist *sg;
 253
 254        for_each_sg(sglist, sg, nelems, i)
 255                _dma_cache_sync(sg_phys(sg), sg->length, dir);
 256}
 257
 258static int arc_dma_supported(struct device *dev, u64 dma_mask)
 259{
 260        /* Support 32 bit DMA mask exclusively */
 261        return dma_mask == DMA_BIT_MASK(32);
 262}
 263
 264const struct dma_map_ops arc_dma_ops = {
 265        .alloc                  = arc_dma_alloc,
 266        .free                   = arc_dma_free,
 267        .mmap                   = arc_dma_mmap,
 268        .map_page               = arc_dma_map_page,
 269        .unmap_page             = arc_dma_unmap_page,
 270        .map_sg                 = arc_dma_map_sg,
 271        .unmap_sg               = arc_dma_unmap_sg,
 272        .sync_single_for_device = arc_dma_sync_single_for_device,
 273        .sync_single_for_cpu    = arc_dma_sync_single_for_cpu,
 274        .sync_sg_for_cpu        = arc_dma_sync_sg_for_cpu,
 275        .sync_sg_for_device     = arc_dma_sync_sg_for_device,
 276        .dma_supported          = arc_dma_supported,
 277};
 278EXPORT_SYMBOL(arc_dma_ops);
 279