linux/arch/tile/kernel/pci-dma.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 */
  14
  15#include <linux/mm.h>
  16#include <linux/dma-mapping.h>
  17#include <linux/swiotlb.h>
  18#include <linux/vmalloc.h>
  19#include <linux/export.h>
  20#include <asm/tlbflush.h>
  21#include <asm/homecache.h>
  22
  23/* Generic DMA mapping functions: */
  24
  25/*
  26 * Allocate what Linux calls "coherent" memory.  On TILEPro this is
  27 * uncached memory; on TILE-Gx it is hash-for-home memory.
  28 */
  29#ifdef __tilepro__
  30#define PAGE_HOME_DMA PAGE_HOME_UNCACHED
  31#else
  32#define PAGE_HOME_DMA PAGE_HOME_HASH
  33#endif
  34
  35static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
  36                                     dma_addr_t *dma_handle, gfp_t gfp,
  37                                     unsigned long attrs)
  38{
  39        u64 dma_mask = (dev && dev->coherent_dma_mask) ?
  40                dev->coherent_dma_mask : DMA_BIT_MASK(32);
  41        int node = dev ? dev_to_node(dev) : 0;
  42        int order = get_order(size);
  43        struct page *pg;
  44        dma_addr_t addr;
  45
  46        gfp |= __GFP_ZERO;
  47
  48        /*
  49         * If the mask specifies that the memory be in the first 4 GB, then
  50         * we force the allocation to come from the DMA zone.  We also
  51         * force the node to 0 since that's the only node where the DMA
  52         * zone isn't empty.  If the mask size is smaller than 32 bits, we
  53         * may still not be able to guarantee a suitable memory address, in
  54         * which case we will return NULL.  But such devices are uncommon.
  55         */
  56        if (dma_mask <= DMA_BIT_MASK(32)) {
  57                gfp |= GFP_DMA32;
  58                node = 0;
  59        }
  60
  61        pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
  62        if (pg == NULL)
  63                return NULL;
  64
  65        addr = page_to_phys(pg);
  66        if (addr + size > dma_mask) {
  67                __homecache_free_pages(pg, order);
  68                return NULL;
  69        }
  70
  71        *dma_handle = addr;
  72
  73        return page_address(pg);
  74}
  75
  76/*
  77 * Free memory that was allocated with tile_dma_alloc_coherent.
  78 */
  79static void tile_dma_free_coherent(struct device *dev, size_t size,
  80                                   void *vaddr, dma_addr_t dma_handle,
  81                                   unsigned long attrs)
  82{
  83        homecache_free_pages((unsigned long)vaddr, get_order(size));
  84}
  85
  86/*
  87 * The map routines "map" the specified address range for DMA
  88 * accesses.  The memory belongs to the device after this call is
  89 * issued, until it is unmapped with dma_unmap_single.
  90 *
  91 * We don't need to do any mapping, we just flush the address range
  92 * out of the cache and return a DMA address.
  93 *
  94 * The unmap routines do whatever is necessary before the processor
  95 * accesses the memory again, and must be called before the driver
  96 * touches the memory.  We can get away with a cache invalidate if we
  97 * can count on nothing having been touched.
  98 */
  99
 100/* Set up a single page for DMA access. */
 101static void __dma_prep_page(struct page *page, unsigned long offset,
 102                            size_t size, enum dma_data_direction direction)
 103{
 104        /*
 105         * Flush the page from cache if necessary.
 106         * On tilegx, data is delivered to hash-for-home L3; on tilepro,
 107         * data is delivered direct to memory.
 108         *
 109         * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
 110         * this to be a "flush" not a "finv" and keep some of the
 111         * state in cache across the DMA operation, but it doesn't seem
 112         * worth creating the necessary flush_buffer_xxx() infrastructure.
 113         */
 114        int home = page_home(page);
 115        switch (home) {
 116        case PAGE_HOME_HASH:
 117#ifdef __tilegx__
 118                return;
 119#endif
 120                break;
 121        case PAGE_HOME_UNCACHED:
 122#ifdef __tilepro__
 123                return;
 124#endif
 125                break;
 126        case PAGE_HOME_IMMUTABLE:
 127                /* Should be going to the device only. */
 128                BUG_ON(direction == DMA_FROM_DEVICE ||
 129                       direction == DMA_BIDIRECTIONAL);
 130                return;
 131        case PAGE_HOME_INCOHERENT:
 132                /* Incoherent anyway, so no need to work hard here. */
 133                return;
 134        default:
 135                BUG_ON(home < 0 || home >= NR_CPUS);
 136                break;
 137        }
 138        homecache_finv_page(page);
 139
 140#ifdef DEBUG_ALIGNMENT
 141        /* Warn if the region isn't cacheline aligned. */
 142        if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
 143                pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
 144                        PFN_PHYS(page_to_pfn(page)) + offset, size);
 145#endif
 146}
 147
 148/* Make the page ready to be read by the core. */
 149static void __dma_complete_page(struct page *page, unsigned long offset,
 150                                size_t size, enum dma_data_direction direction)
 151{
 152#ifdef __tilegx__
 153        switch (page_home(page)) {
 154        case PAGE_HOME_HASH:
 155                /* I/O device delivered data the way the cpu wanted it. */
 156                break;
 157        case PAGE_HOME_INCOHERENT:
 158                /* Incoherent anyway, so no need to work hard here. */
 159                break;
 160        case PAGE_HOME_IMMUTABLE:
 161                /* Extra read-only copies are not a problem. */
 162                break;
 163        default:
 164                /* Flush the bogus hash-for-home I/O entries to memory. */
 165                homecache_finv_map_page(page, PAGE_HOME_HASH);
 166                break;
 167        }
 168#endif
 169}
 170
 171static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
 172                                enum dma_data_direction direction)
 173{
 174        struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
 175        unsigned long offset = dma_addr & (PAGE_SIZE - 1);
 176        size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
 177
 178        while (size != 0) {
 179                __dma_prep_page(page, offset, bytes, direction);
 180                size -= bytes;
 181                ++page;
 182                offset = 0;
 183                bytes = min((size_t)PAGE_SIZE, size);
 184        }
 185}
 186
 187static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
 188                                    enum dma_data_direction direction)
 189{
 190        struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
 191        unsigned long offset = dma_addr & (PAGE_SIZE - 1);
 192        size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
 193
 194        while (size != 0) {
 195                __dma_complete_page(page, offset, bytes, direction);
 196                size -= bytes;
 197                ++page;
 198                offset = 0;
 199                bytes = min((size_t)PAGE_SIZE, size);
 200        }
 201}
 202
 203static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 204                           int nents, enum dma_data_direction direction,
 205                           unsigned long attrs)
 206{
 207        struct scatterlist *sg;
 208        int i;
 209
 210        BUG_ON(!valid_dma_direction(direction));
 211
 212        WARN_ON(nents == 0 || sglist->length == 0);
 213
 214        for_each_sg(sglist, sg, nents, i) {
 215                sg->dma_address = sg_phys(sg);
 216#ifdef CONFIG_NEED_SG_DMA_LENGTH
 217                sg->dma_length = sg->length;
 218#endif
 219                if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 220                        continue;
 221                __dma_prep_pa_range(sg->dma_address, sg->length, direction);
 222        }
 223
 224        return nents;
 225}
 226
 227static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 228                              int nents, enum dma_data_direction direction,
 229                              unsigned long attrs)
 230{
 231        struct scatterlist *sg;
 232        int i;
 233
 234        BUG_ON(!valid_dma_direction(direction));
 235        for_each_sg(sglist, sg, nents, i) {
 236                sg->dma_address = sg_phys(sg);
 237                if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 238                        continue;
 239                __dma_complete_pa_range(sg->dma_address, sg->length,
 240                                        direction);
 241        }
 242}
 243
 244static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
 245                                    unsigned long offset, size_t size,
 246                                    enum dma_data_direction direction,
 247                                    unsigned long attrs)
 248{
 249        BUG_ON(!valid_dma_direction(direction));
 250
 251        BUG_ON(offset + size > PAGE_SIZE);
 252        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 253                __dma_prep_page(page, offset, size, direction);
 254
 255        return page_to_pa(page) + offset;
 256}
 257
 258static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 259                                size_t size, enum dma_data_direction direction,
 260                                unsigned long attrs)
 261{
 262        BUG_ON(!valid_dma_direction(direction));
 263
 264        if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 265                return;
 266
 267        __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
 268                            dma_address & (PAGE_SIZE - 1), size, direction);
 269}
 270
 271static void tile_dma_sync_single_for_cpu(struct device *dev,
 272                                         dma_addr_t dma_handle,
 273                                         size_t size,
 274                                         enum dma_data_direction direction)
 275{
 276        BUG_ON(!valid_dma_direction(direction));
 277
 278        __dma_complete_pa_range(dma_handle, size, direction);
 279}
 280
 281static void tile_dma_sync_single_for_device(struct device *dev,
 282                                            dma_addr_t dma_handle, size_t size,
 283                                            enum dma_data_direction direction)
 284{
 285        __dma_prep_pa_range(dma_handle, size, direction);
 286}
 287
 288static void tile_dma_sync_sg_for_cpu(struct device *dev,
 289                                     struct scatterlist *sglist, int nelems,
 290                                     enum dma_data_direction direction)
 291{
 292        struct scatterlist *sg;
 293        int i;
 294
 295        BUG_ON(!valid_dma_direction(direction));
 296        WARN_ON(nelems == 0 || sglist->length == 0);
 297
 298        for_each_sg(sglist, sg, nelems, i) {
 299                dma_sync_single_for_cpu(dev, sg->dma_address,
 300                                        sg_dma_len(sg), direction);
 301        }
 302}
 303
 304static void tile_dma_sync_sg_for_device(struct device *dev,
 305                                        struct scatterlist *sglist, int nelems,
 306                                        enum dma_data_direction direction)
 307{
 308        struct scatterlist *sg;
 309        int i;
 310
 311        BUG_ON(!valid_dma_direction(direction));
 312        WARN_ON(nelems == 0 || sglist->length == 0);
 313
 314        for_each_sg(sglist, sg, nelems, i) {
 315                dma_sync_single_for_device(dev, sg->dma_address,
 316                                           sg_dma_len(sg), direction);
 317        }
 318}
 319
 320static const struct dma_map_ops tile_default_dma_map_ops = {
 321        .alloc = tile_dma_alloc_coherent,
 322        .free = tile_dma_free_coherent,
 323        .map_page = tile_dma_map_page,
 324        .unmap_page = tile_dma_unmap_page,
 325        .map_sg = tile_dma_map_sg,
 326        .unmap_sg = tile_dma_unmap_sg,
 327        .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
 328        .sync_single_for_device = tile_dma_sync_single_for_device,
 329        .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
 330        .sync_sg_for_device = tile_dma_sync_sg_for_device,
 331};
 332
 333const struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
 334EXPORT_SYMBOL(tile_dma_map_ops);
 335
 336/* Generic PCI DMA mapping functions */
 337
 338static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
 339                                         dma_addr_t *dma_handle, gfp_t gfp,
 340                                         unsigned long attrs)
 341{
 342        int node = dev_to_node(dev);
 343        int order = get_order(size);
 344        struct page *pg;
 345        dma_addr_t addr;
 346
 347        gfp |= __GFP_ZERO;
 348
 349        pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
 350        if (pg == NULL)
 351                return NULL;
 352
 353        addr = page_to_phys(pg);
 354
 355        *dma_handle = addr + get_dma_offset(dev);
 356
 357        return page_address(pg);
 358}
 359
 360/*
 361 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
 362 */
 363static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
 364                                       void *vaddr, dma_addr_t dma_handle,
 365                                       unsigned long attrs)
 366{
 367        homecache_free_pages((unsigned long)vaddr, get_order(size));
 368}
 369
 370static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 371                               int nents, enum dma_data_direction direction,
 372                               unsigned long attrs)
 373{
 374        struct scatterlist *sg;
 375        int i;
 376
 377        BUG_ON(!valid_dma_direction(direction));
 378
 379        WARN_ON(nents == 0 || sglist->length == 0);
 380
 381        for_each_sg(sglist, sg, nents, i) {
 382                sg->dma_address = sg_phys(sg);
 383                __dma_prep_pa_range(sg->dma_address, sg->length, direction);
 384
 385                sg->dma_address = sg->dma_address + get_dma_offset(dev);
 386#ifdef CONFIG_NEED_SG_DMA_LENGTH
 387                sg->dma_length = sg->length;
 388#endif
 389        }
 390
 391        return nents;
 392}
 393
 394static void tile_pci_dma_unmap_sg(struct device *dev,
 395                                  struct scatterlist *sglist, int nents,
 396                                  enum dma_data_direction direction,
 397                                  unsigned long attrs)
 398{
 399        struct scatterlist *sg;
 400        int i;
 401
 402        BUG_ON(!valid_dma_direction(direction));
 403        for_each_sg(sglist, sg, nents, i) {
 404                sg->dma_address = sg_phys(sg);
 405                __dma_complete_pa_range(sg->dma_address, sg->length,
 406                                        direction);
 407        }
 408}
 409
 410static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
 411                                        unsigned long offset, size_t size,
 412                                        enum dma_data_direction direction,
 413                                        unsigned long attrs)
 414{
 415        BUG_ON(!valid_dma_direction(direction));
 416
 417        BUG_ON(offset + size > PAGE_SIZE);
 418        __dma_prep_page(page, offset, size, direction);
 419
 420        return page_to_pa(page) + offset + get_dma_offset(dev);
 421}
 422
 423static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 424                                    size_t size,
 425                                    enum dma_data_direction direction,
 426                                    unsigned long attrs)
 427{
 428        BUG_ON(!valid_dma_direction(direction));
 429
 430        dma_address -= get_dma_offset(dev);
 431
 432        __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
 433                            dma_address & (PAGE_SIZE - 1), size, direction);
 434}
 435
 436static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
 437                                             dma_addr_t dma_handle,
 438                                             size_t size,
 439                                             enum dma_data_direction direction)
 440{
 441        BUG_ON(!valid_dma_direction(direction));
 442
 443        dma_handle -= get_dma_offset(dev);
 444
 445        __dma_complete_pa_range(dma_handle, size, direction);
 446}
 447
 448static void tile_pci_dma_sync_single_for_device(struct device *dev,
 449                                                dma_addr_t dma_handle,
 450                                                size_t size,
 451                                                enum dma_data_direction
 452                                                direction)
 453{
 454        dma_handle -= get_dma_offset(dev);
 455
 456        __dma_prep_pa_range(dma_handle, size, direction);
 457}
 458
 459static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
 460                                         struct scatterlist *sglist,
 461                                         int nelems,
 462                                         enum dma_data_direction direction)
 463{
 464        struct scatterlist *sg;
 465        int i;
 466
 467        BUG_ON(!valid_dma_direction(direction));
 468        WARN_ON(nelems == 0 || sglist->length == 0);
 469
 470        for_each_sg(sglist, sg, nelems, i) {
 471                dma_sync_single_for_cpu(dev, sg->dma_address,
 472                                        sg_dma_len(sg), direction);
 473        }
 474}
 475
 476static void tile_pci_dma_sync_sg_for_device(struct device *dev,
 477                                            struct scatterlist *sglist,
 478                                            int nelems,
 479                                            enum dma_data_direction direction)
 480{
 481        struct scatterlist *sg;
 482        int i;
 483
 484        BUG_ON(!valid_dma_direction(direction));
 485        WARN_ON(nelems == 0 || sglist->length == 0);
 486
 487        for_each_sg(sglist, sg, nelems, i) {
 488                dma_sync_single_for_device(dev, sg->dma_address,
 489                                           sg_dma_len(sg), direction);
 490        }
 491}
 492
 493static const struct dma_map_ops tile_pci_default_dma_map_ops = {
 494        .alloc = tile_pci_dma_alloc_coherent,
 495        .free = tile_pci_dma_free_coherent,
 496        .map_page = tile_pci_dma_map_page,
 497        .unmap_page = tile_pci_dma_unmap_page,
 498        .map_sg = tile_pci_dma_map_sg,
 499        .unmap_sg = tile_pci_dma_unmap_sg,
 500        .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
 501        .sync_single_for_device = tile_pci_dma_sync_single_for_device,
 502        .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
 503        .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
 504};
 505
 506const struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
 507EXPORT_SYMBOL(gx_pci_dma_map_ops);
 508
 509/* PCI DMA mapping functions for legacy PCI devices */
 510
 511#ifdef CONFIG_SWIOTLB
 512static const struct dma_map_ops pci_hybrid_dma_ops = {
 513        .alloc = swiotlb_alloc,
 514        .free = swiotlb_free,
 515        .map_page = tile_pci_dma_map_page,
 516        .unmap_page = tile_pci_dma_unmap_page,
 517        .map_sg = tile_pci_dma_map_sg,
 518        .unmap_sg = tile_pci_dma_unmap_sg,
 519        .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
 520        .sync_single_for_device = tile_pci_dma_sync_single_for_device,
 521        .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
 522        .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
 523};
 524
 525const struct dma_map_ops *gx_legacy_pci_dma_map_ops = &swiotlb_dma_ops;
 526const struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
 527#else
 528const struct dma_map_ops *gx_legacy_pci_dma_map_ops;
 529const struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
 530#endif
 531EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
 532EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops);
 533
 534int dma_set_mask(struct device *dev, u64 mask)
 535{
 536        const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 537
 538        /*
 539         * For PCI devices with 64-bit DMA addressing capability, promote
 540         * the dma_ops to hybrid, with the consistent memory DMA space limited
 541         * to 32-bit. For 32-bit capable devices, limit the streaming DMA
 542         * address range to max_direct_dma_addr.
 543         */
 544        if (dma_ops == gx_pci_dma_map_ops ||
 545            dma_ops == gx_hybrid_pci_dma_map_ops ||
 546            dma_ops == gx_legacy_pci_dma_map_ops) {
 547                if (mask == DMA_BIT_MASK(64) &&
 548                    dma_ops == gx_legacy_pci_dma_map_ops)
 549                        set_dma_ops(dev, gx_hybrid_pci_dma_map_ops);
 550                else if (mask > dev->archdata.max_direct_dma_addr)
 551                        mask = dev->archdata.max_direct_dma_addr;
 552        }
 553
 554        if (!dev->dma_mask || !dma_supported(dev, mask))
 555                return -EIO;
 556
 557        *dev->dma_mask = mask;
 558
 559        return 0;
 560}
 561EXPORT_SYMBOL(dma_set_mask);
 562
 563#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
 564int dma_set_coherent_mask(struct device *dev, u64 mask)
 565{
 566        const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 567
 568        /*
 569         * For PCI devices with 64-bit DMA addressing capability, promote
 570         * the dma_ops to full capability for both streams and consistent
 571         * memory access. For 32-bit capable devices, limit the consistent 
 572         * memory DMA range to max_direct_dma_addr.
 573         */
 574        if (dma_ops == gx_pci_dma_map_ops ||
 575            dma_ops == gx_hybrid_pci_dma_map_ops ||
 576            dma_ops == gx_legacy_pci_dma_map_ops) {
 577                if (mask == DMA_BIT_MASK(64))
 578                        set_dma_ops(dev, gx_pci_dma_map_ops);
 579                else if (mask > dev->archdata.max_direct_dma_addr)
 580                        mask = dev->archdata.max_direct_dma_addr;
 581        }
 582
 583        if (!dma_supported(dev, mask))
 584                return -EIO;
 585        dev->coherent_dma_mask = mask;
 586        return 0;
 587}
 588EXPORT_SYMBOL(dma_set_coherent_mask);
 589#endif
 590
 591#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 592/*
 593 * The generic dma_get_required_mask() uses the highest physical address
 594 * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
 595 * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
 596 * DMAs to use the full 64-bit PCI address space and not limited by
 597 * the physical memory space, we always let the PCI devices use
 598 * 64-bit DMA if they have that capability, by returning the 64-bit
 599 * DMA mask here. The device driver has the option to use 32-bit DMA if
 600 * the device is not capable of 64-bit DMA.
 601 */
 602u64 dma_get_required_mask(struct device *dev)
 603{
 604        return DMA_BIT_MASK(64);
 605}
 606EXPORT_SYMBOL_GPL(dma_get_required_mask);
 607#endif
 608