linux/kernel/dma/direct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Christoph Hellwig.
   4 *
   5 * DMA operations that map physical memory directly without using an IOMMU.
   6 */
   7#include <linux/memblock.h> /* for max_pfn */
   8#include <linux/export.h>
   9#include <linux/mm.h>
  10#include <linux/dma-direct.h>
  11#include <linux/scatterlist.h>
  12#include <linux/dma-contiguous.h>
  13#include <linux/dma-noncoherent.h>
  14#include <linux/pfn.h>
  15#include <linux/set_memory.h>
  16#include <linux/swiotlb.h>
  17
  18/*
  19 * Most architectures use ZONE_DMA for the first 16 Megabytes, but
  20 * some use it for entirely different regions:
  21 */
  22#ifndef ARCH_ZONE_DMA_BITS
  23#define ARCH_ZONE_DMA_BITS 24
  24#endif
  25
  26/*
  27 * For AMD SEV all DMA must be to unencrypted addresses.
  28 */
  29static inline bool force_dma_unencrypted(void)
  30{
  31        return sev_active();
  32}
  33
  34static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
  35{
  36        if (!dev->dma_mask) {
  37                dev_err_once(dev, "DMA map on device without dma_mask\n");
  38        } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
  39                dev_err_once(dev,
  40                        "overflow %pad+%zu of DMA mask %llx bus mask %llx\n",
  41                        &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask);
  42        }
  43        WARN_ON_ONCE(1);
  44}
  45
  46static inline dma_addr_t phys_to_dma_direct(struct device *dev,
  47                phys_addr_t phys)
  48{
  49        if (force_dma_unencrypted())
  50                return __phys_to_dma(dev, phys);
  51        return phys_to_dma(dev, phys);
  52}
  53
  54u64 dma_direct_get_required_mask(struct device *dev)
  55{
  56        u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
  57
  58        if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma)
  59                max_dma = dev->bus_dma_mask;
  60
  61        return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
  62}
  63
  64static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
  65                u64 *phys_mask)
  66{
  67        if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
  68                dma_mask = dev->bus_dma_mask;
  69
  70        if (force_dma_unencrypted())
  71                *phys_mask = __dma_to_phys(dev, dma_mask);
  72        else
  73                *phys_mask = dma_to_phys(dev, dma_mask);
  74
  75        /*
  76         * Optimistically try the zone that the physical address mask falls
  77         * into first.  If that returns memory that isn't actually addressable
  78         * we will fallback to the next lower zone and try again.
  79         *
  80         * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
  81         * zones.
  82         */
  83        if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
  84                return GFP_DMA;
  85        if (*phys_mask <= DMA_BIT_MASK(32))
  86                return GFP_DMA32;
  87        return 0;
  88}
  89
  90static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
  91{
  92        return phys_to_dma_direct(dev, phys) + size - 1 <=
  93                        min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
  94}
  95
  96struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
  97                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
  98{
  99        unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 100        int page_order = get_order(size);
 101        struct page *page = NULL;
 102        u64 phys_mask;
 103
 104        if (attrs & DMA_ATTR_NO_WARN)
 105                gfp |= __GFP_NOWARN;
 106
 107        /* we always manually zero the memory once we are done: */
 108        gfp &= ~__GFP_ZERO;
 109        gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
 110                        &phys_mask);
 111again:
 112        /* CMA can be used only in the context which permits sleeping */
 113        if (gfpflags_allow_blocking(gfp)) {
 114                page = dma_alloc_from_contiguous(dev, count, page_order,
 115                                                 gfp & __GFP_NOWARN);
 116                if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 117                        dma_release_from_contiguous(dev, page, count);
 118                        page = NULL;
 119                }
 120        }
 121        if (!page)
 122                page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
 123
 124        if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 125                __free_pages(page, page_order);
 126                page = NULL;
 127
 128                if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
 129                    phys_mask < DMA_BIT_MASK(64) &&
 130                    !(gfp & (GFP_DMA32 | GFP_DMA))) {
 131                        gfp |= GFP_DMA32;
 132                        goto again;
 133                }
 134
 135                if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 136                    phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) {
 137                        gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
 138                        goto again;
 139                }
 140        }
 141
 142        return page;
 143}
 144
 145void *dma_direct_alloc_pages(struct device *dev, size_t size,
 146                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 147{
 148        struct page *page;
 149        void *ret;
 150
 151        page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
 152        if (!page)
 153                return NULL;
 154
 155        if (PageHighMem(page)) {
 156                /*
 157                 * Depending on the cma= arguments and per-arch setup
 158                 * dma_alloc_from_contiguous could return highmem pages.
 159                 * Without remapping there is no way to return them here,
 160                 * so log an error and fail.
 161                 */
 162                dev_info(dev, "Rejecting highmem page from CMA.\n");
 163                __dma_direct_free_pages(dev, size, page);
 164                return NULL;
 165        }
 166
 167        ret = page_address(page);
 168        if (force_dma_unencrypted()) {
 169                set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
 170                *dma_handle = __phys_to_dma(dev, page_to_phys(page));
 171        } else {
 172                *dma_handle = phys_to_dma(dev, page_to_phys(page));
 173        }
 174        memset(ret, 0, size);
 175        return ret;
 176}
 177
 178void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
 179{
 180        unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 181
 182        if (!dma_release_from_contiguous(dev, page, count))
 183                __free_pages(page, get_order(size));
 184}
 185
 186void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
 187                dma_addr_t dma_addr, unsigned long attrs)
 188{
 189        unsigned int page_order = get_order(size);
 190
 191        if (force_dma_unencrypted())
 192                set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
 193        __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
 194}
 195
 196void *dma_direct_alloc(struct device *dev, size_t size,
 197                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 198{
 199        if (!dev_is_dma_coherent(dev))
 200                return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 201        return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
 202}
 203
 204void dma_direct_free(struct device *dev, size_t size,
 205                void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
 206{
 207        if (!dev_is_dma_coherent(dev))
 208                arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
 209        else
 210                dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 211}
 212
 213#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
 214    defined(CONFIG_SWIOTLB)
 215void dma_direct_sync_single_for_device(struct device *dev,
 216                dma_addr_t addr, size_t size, enum dma_data_direction dir)
 217{
 218        phys_addr_t paddr = dma_to_phys(dev, addr);
 219
 220        if (unlikely(is_swiotlb_buffer(paddr)))
 221                swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
 222
 223        if (!dev_is_dma_coherent(dev))
 224                arch_sync_dma_for_device(dev, paddr, size, dir);
 225}
 226EXPORT_SYMBOL(dma_direct_sync_single_for_device);
 227
 228void dma_direct_sync_sg_for_device(struct device *dev,
 229                struct scatterlist *sgl, int nents, enum dma_data_direction dir)
 230{
 231        struct scatterlist *sg;
 232        int i;
 233
 234        for_each_sg(sgl, sg, nents, i) {
 235                if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
 236                        swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
 237                                        dir, SYNC_FOR_DEVICE);
 238
 239                if (!dev_is_dma_coherent(dev))
 240                        arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
 241                                        dir);
 242        }
 243}
 244EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
 245#endif
 246
 247#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
 248    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
 249    defined(CONFIG_SWIOTLB)
 250void dma_direct_sync_single_for_cpu(struct device *dev,
 251                dma_addr_t addr, size_t size, enum dma_data_direction dir)
 252{
 253        phys_addr_t paddr = dma_to_phys(dev, addr);
 254
 255        if (!dev_is_dma_coherent(dev)) {
 256                arch_sync_dma_for_cpu(dev, paddr, size, dir);
 257                arch_sync_dma_for_cpu_all(dev);
 258        }
 259
 260        if (unlikely(is_swiotlb_buffer(paddr)))
 261                swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
 262}
 263EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
 264
 265void dma_direct_sync_sg_for_cpu(struct device *dev,
 266                struct scatterlist *sgl, int nents, enum dma_data_direction dir)
 267{
 268        struct scatterlist *sg;
 269        int i;
 270
 271        for_each_sg(sgl, sg, nents, i) {
 272                if (!dev_is_dma_coherent(dev))
 273                        arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
 274        
 275                if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
 276                        swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir,
 277                                        SYNC_FOR_CPU);
 278        }
 279
 280        if (!dev_is_dma_coherent(dev))
 281                arch_sync_dma_for_cpu_all(dev);
 282}
 283EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
 284
 285void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 286                size_t size, enum dma_data_direction dir, unsigned long attrs)
 287{
 288        phys_addr_t phys = dma_to_phys(dev, addr);
 289
 290        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 291                dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 292
 293        if (unlikely(is_swiotlb_buffer(phys)))
 294                swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 295}
 296EXPORT_SYMBOL(dma_direct_unmap_page);
 297
 298void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
 299                int nents, enum dma_data_direction dir, unsigned long attrs)
 300{
 301        struct scatterlist *sg;
 302        int i;
 303
 304        for_each_sg(sgl, sg, nents, i)
 305                dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
 306                             attrs);
 307}
 308EXPORT_SYMBOL(dma_direct_unmap_sg);
 309#endif
 310
 311static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
 312                size_t size)
 313{
 314        return swiotlb_force != SWIOTLB_FORCE &&
 315                (!dev || dma_capable(dev, dma_addr, size));
 316}
 317
 318dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 319                unsigned long offset, size_t size, enum dma_data_direction dir,
 320                unsigned long attrs)
 321{
 322        phys_addr_t phys = page_to_phys(page) + offset;
 323        dma_addr_t dma_addr = phys_to_dma(dev, phys);
 324
 325        if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
 326            !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
 327                report_addr(dev, dma_addr, size);
 328                return DMA_MAPPING_ERROR;
 329        }
 330
 331        if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 332                arch_sync_dma_for_device(dev, phys, size, dir);
 333        return dma_addr;
 334}
 335EXPORT_SYMBOL(dma_direct_map_page);
 336
 337int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 338                enum dma_data_direction dir, unsigned long attrs)
 339{
 340        int i;
 341        struct scatterlist *sg;
 342
 343        for_each_sg(sgl, sg, nents, i) {
 344                sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
 345                                sg->offset, sg->length, dir, attrs);
 346                if (sg->dma_address == DMA_MAPPING_ERROR)
 347                        goto out_unmap;
 348                sg_dma_len(sg) = sg->length;
 349        }
 350
 351        return nents;
 352
 353out_unmap:
 354        dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
 355        return 0;
 356}
 357EXPORT_SYMBOL(dma_direct_map_sg);
 358
 359/*
 360 * Because 32-bit DMA masks are so common we expect every architecture to be
 361 * able to satisfy them - either by not supporting more physical memory, or by
 362 * providing a ZONE_DMA32.  If neither is the case, the architecture needs to
 363 * use an IOMMU instead of the direct mapping.
 364 */
 365int dma_direct_supported(struct device *dev, u64 mask)
 366{
 367        u64 min_mask;
 368
 369        if (IS_ENABLED(CONFIG_ZONE_DMA))
 370                min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS);
 371        else
 372                min_mask = DMA_BIT_MASK(32);
 373
 374        min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT);
 375
 376        /*
 377         * This check needs to be against the actual bit mask value, so
 378         * use __phys_to_dma() here so that the SME encryption mask isn't
 379         * part of the check.
 380         */
 381        return mask >= __phys_to_dma(dev, min_mask);
 382}
 383