linux/drivers/iommu/dma-iommu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * A fairly generic DMA-API to IOMMU-API glue layer.
   4 *
   5 * Copyright (C) 2014-2015 ARM Ltd.
   6 *
   7 * based in part on arch/arm/mm/dma-mapping.c:
   8 * Copyright (C) 2000-2004 Russell King
   9 */
  10
  11#include <linux/acpi_iort.h>
  12#include <linux/device.h>
  13#include <linux/dma-map-ops.h>
  14#include <linux/dma-iommu.h>
  15#include <linux/gfp.h>
  16#include <linux/huge_mm.h>
  17#include <linux/iommu.h>
  18#include <linux/iova.h>
  19#include <linux/irq.h>
  20#include <linux/mm.h>
  21#include <linux/mutex.h>
  22#include <linux/pci.h>
  23#include <linux/swiotlb.h>
  24#include <linux/scatterlist.h>
  25#include <linux/vmalloc.h>
  26#include <linux/crash_dump.h>
  27#include <linux/dma-direct.h>
  28
  29struct iommu_dma_msi_page {
  30        struct list_head        list;
  31        dma_addr_t              iova;
  32        phys_addr_t             phys;
  33};
  34
  35enum iommu_dma_cookie_type {
  36        IOMMU_DMA_IOVA_COOKIE,
  37        IOMMU_DMA_MSI_COOKIE,
  38};
  39
  40struct iommu_dma_cookie {
  41        enum iommu_dma_cookie_type      type;
  42        union {
  43                /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
  44                struct iova_domain      iovad;
  45                /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
  46                dma_addr_t              msi_iova;
  47        };
  48        struct list_head                msi_page_list;
  49
  50        /* Domain for flush queue callback; NULL if flush queue not in use */
  51        struct iommu_domain             *fq_domain;
  52};
  53
  54static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
  55bool iommu_dma_forcedac __read_mostly;
  56
  57static int __init iommu_dma_forcedac_setup(char *str)
  58{
  59        int ret = kstrtobool(str, &iommu_dma_forcedac);
  60
  61        if (!ret && iommu_dma_forcedac)
  62                pr_info("Forcing DAC for PCI devices\n");
  63        return ret;
  64}
  65early_param("iommu.forcedac", iommu_dma_forcedac_setup);
  66
  67static void iommu_dma_entry_dtor(unsigned long data)
  68{
  69        struct page *freelist = (struct page *)data;
  70
  71        while (freelist) {
  72                unsigned long p = (unsigned long)page_address(freelist);
  73
  74                freelist = freelist->freelist;
  75                free_page(p);
  76        }
  77}
  78
  79static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
  80{
  81        if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
  82                return cookie->iovad.granule;
  83        return PAGE_SIZE;
  84}
  85
  86static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
  87{
  88        struct iommu_dma_cookie *cookie;
  89
  90        cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
  91        if (cookie) {
  92                INIT_LIST_HEAD(&cookie->msi_page_list);
  93                cookie->type = type;
  94        }
  95        return cookie;
  96}
  97
  98/**
  99 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
 100 * @domain: IOMMU domain to prepare for DMA-API usage
 101 *
 102 * IOMMU drivers should normally call this from their domain_alloc
 103 * callback when domain->type == IOMMU_DOMAIN_DMA.
 104 */
 105int iommu_get_dma_cookie(struct iommu_domain *domain)
 106{
 107        if (domain->iova_cookie)
 108                return -EEXIST;
 109
 110        domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
 111        if (!domain->iova_cookie)
 112                return -ENOMEM;
 113
 114        return 0;
 115}
 116EXPORT_SYMBOL(iommu_get_dma_cookie);
 117
 118/**
 119 * iommu_get_msi_cookie - Acquire just MSI remapping resources
 120 * @domain: IOMMU domain to prepare
 121 * @base: Start address of IOVA region for MSI mappings
 122 *
 123 * Users who manage their own IOVA allocation and do not want DMA API support,
 124 * but would still like to take advantage of automatic MSI remapping, can use
 125 * this to initialise their own domain appropriately. Users should reserve a
 126 * contiguous IOVA region, starting at @base, large enough to accommodate the
 127 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
 128 * used by the devices attached to @domain.
 129 */
 130int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
 131{
 132        struct iommu_dma_cookie *cookie;
 133
 134        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
 135                return -EINVAL;
 136
 137        if (domain->iova_cookie)
 138                return -EEXIST;
 139
 140        cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
 141        if (!cookie)
 142                return -ENOMEM;
 143
 144        cookie->msi_iova = base;
 145        domain->iova_cookie = cookie;
 146        return 0;
 147}
 148EXPORT_SYMBOL(iommu_get_msi_cookie);
 149
 150/**
 151 * iommu_put_dma_cookie - Release a domain's DMA mapping resources
 152 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
 153 *          iommu_get_msi_cookie()
 154 *
 155 * IOMMU drivers should normally call this from their domain_free callback.
 156 */
 157void iommu_put_dma_cookie(struct iommu_domain *domain)
 158{
 159        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 160        struct iommu_dma_msi_page *msi, *tmp;
 161
 162        if (!cookie)
 163                return;
 164
 165        if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
 166                put_iova_domain(&cookie->iovad);
 167
 168        list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
 169                list_del(&msi->list);
 170                kfree(msi);
 171        }
 172        kfree(cookie);
 173        domain->iova_cookie = NULL;
 174}
 175EXPORT_SYMBOL(iommu_put_dma_cookie);
 176
 177/**
 178 * iommu_dma_get_resv_regions - Reserved region driver helper
 179 * @dev: Device from iommu_get_resv_regions()
 180 * @list: Reserved region list from iommu_get_resv_regions()
 181 *
 182 * IOMMU drivers can use this to implement their .get_resv_regions callback
 183 * for general non-IOMMU-specific reservations. Currently, this covers GICv3
 184 * ITS region reservation on ACPI based ARM platforms that may require HW MSI
 185 * reservation.
 186 */
 187void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 188{
 189
 190        if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
 191                iort_iommu_msi_get_resv_regions(dev, list);
 192
 193}
 194EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 195
 196static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
 197                phys_addr_t start, phys_addr_t end)
 198{
 199        struct iova_domain *iovad = &cookie->iovad;
 200        struct iommu_dma_msi_page *msi_page;
 201        int i, num_pages;
 202
 203        start -= iova_offset(iovad, start);
 204        num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
 205
 206        for (i = 0; i < num_pages; i++) {
 207                msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
 208                if (!msi_page)
 209                        return -ENOMEM;
 210
 211                msi_page->phys = start;
 212                msi_page->iova = start;
 213                INIT_LIST_HEAD(&msi_page->list);
 214                list_add(&msi_page->list, &cookie->msi_page_list);
 215                start += iovad->granule;
 216        }
 217
 218        return 0;
 219}
 220
 221static int iova_reserve_pci_windows(struct pci_dev *dev,
 222                struct iova_domain *iovad)
 223{
 224        struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
 225        struct resource_entry *window;
 226        unsigned long lo, hi;
 227        phys_addr_t start = 0, end;
 228
 229        resource_list_for_each_entry(window, &bridge->windows) {
 230                if (resource_type(window->res) != IORESOURCE_MEM)
 231                        continue;
 232
 233                lo = iova_pfn(iovad, window->res->start - window->offset);
 234                hi = iova_pfn(iovad, window->res->end - window->offset);
 235                reserve_iova(iovad, lo, hi);
 236        }
 237
 238        /* Get reserved DMA windows from host bridge */
 239        resource_list_for_each_entry(window, &bridge->dma_ranges) {
 240                end = window->res->start - window->offset;
 241resv_iova:
 242                if (end > start) {
 243                        lo = iova_pfn(iovad, start);
 244                        hi = iova_pfn(iovad, end);
 245                        reserve_iova(iovad, lo, hi);
 246                } else if (end < start) {
 247                        /* dma_ranges list should be sorted */
 248                        dev_err(&dev->dev,
 249                                "Failed to reserve IOVA [%pa-%pa]\n",
 250                                &start, &end);
 251                        return -EINVAL;
 252                }
 253
 254                start = window->res->end - window->offset + 1;
 255                /* If window is last entry */
 256                if (window->node.next == &bridge->dma_ranges &&
 257                    end != ~(phys_addr_t)0) {
 258                        end = ~(phys_addr_t)0;
 259                        goto resv_iova;
 260                }
 261        }
 262
 263        return 0;
 264}
 265
 266static int iova_reserve_iommu_regions(struct device *dev,
 267                struct iommu_domain *domain)
 268{
 269        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 270        struct iova_domain *iovad = &cookie->iovad;
 271        struct iommu_resv_region *region;
 272        LIST_HEAD(resv_regions);
 273        int ret = 0;
 274
 275        if (dev_is_pci(dev)) {
 276                ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
 277                if (ret)
 278                        return ret;
 279        }
 280
 281        iommu_get_resv_regions(dev, &resv_regions);
 282        list_for_each_entry(region, &resv_regions, list) {
 283                unsigned long lo, hi;
 284
 285                /* We ARE the software that manages these! */
 286                if (region->type == IOMMU_RESV_SW_MSI)
 287                        continue;
 288
 289                lo = iova_pfn(iovad, region->start);
 290                hi = iova_pfn(iovad, region->start + region->length - 1);
 291                reserve_iova(iovad, lo, hi);
 292
 293                if (region->type == IOMMU_RESV_MSI)
 294                        ret = cookie_init_hw_msi_region(cookie, region->start,
 295                                        region->start + region->length);
 296                if (ret)
 297                        break;
 298        }
 299        iommu_put_resv_regions(dev, &resv_regions);
 300
 301        return ret;
 302}
 303
 304static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
 305{
 306        struct iommu_dma_cookie *cookie;
 307        struct iommu_domain *domain;
 308
 309        cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
 310        domain = cookie->fq_domain;
 311
 312        domain->ops->flush_iotlb_all(domain);
 313}
 314
 315static bool dev_is_untrusted(struct device *dev)
 316{
 317        return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 318}
 319
 320/* sysfs updates are serialised by the mutex of the group owning @domain */
 321int iommu_dma_init_fq(struct iommu_domain *domain)
 322{
 323        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 324        int ret;
 325
 326        if (cookie->fq_domain)
 327                return 0;
 328
 329        ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all,
 330                                    iommu_dma_entry_dtor);
 331        if (ret) {
 332                pr_warn("iova flush queue initialization failed\n");
 333                return ret;
 334        }
 335        /*
 336         * Prevent incomplete iovad->fq being observable. Pairs with path from
 337         * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
 338         */
 339        smp_wmb();
 340        WRITE_ONCE(cookie->fq_domain, domain);
 341        return 0;
 342}
 343
 344/**
 345 * iommu_dma_init_domain - Initialise a DMA mapping domain
 346 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
 347 * @base: IOVA at which the mappable address space starts
 348 * @limit: Last address of the IOVA space
 349 * @dev: Device the domain is being initialised for
 350 *
 351 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
 352 * avoid rounding surprises. If necessary, we reserve the page at address 0
 353 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
 354 * any change which could make prior IOVAs invalid will fail.
 355 */
 356static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 357                                 dma_addr_t limit, struct device *dev)
 358{
 359        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 360        unsigned long order, base_pfn;
 361        struct iova_domain *iovad;
 362
 363        if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 364                return -EINVAL;
 365
 366        iovad = &cookie->iovad;
 367
 368        /* Use the smallest supported page size for IOVA granularity */
 369        order = __ffs(domain->pgsize_bitmap);
 370        base_pfn = max_t(unsigned long, 1, base >> order);
 371
 372        /* Check the domain allows at least some access to the device... */
 373        if (domain->geometry.force_aperture) {
 374                if (base > domain->geometry.aperture_end ||
 375                    limit < domain->geometry.aperture_start) {
 376                        pr_warn("specified DMA range outside IOMMU capability\n");
 377                        return -EFAULT;
 378                }
 379                /* ...then finally give it a kicking to make sure it fits */
 380                base_pfn = max_t(unsigned long, base_pfn,
 381                                domain->geometry.aperture_start >> order);
 382        }
 383
 384        /* start_pfn is always nonzero for an already-initialised domain */
 385        if (iovad->start_pfn) {
 386                if (1UL << order != iovad->granule ||
 387                    base_pfn != iovad->start_pfn) {
 388                        pr_warn("Incompatible range for DMA domain\n");
 389                        return -EFAULT;
 390                }
 391
 392                return 0;
 393        }
 394
 395        init_iova_domain(iovad, 1UL << order, base_pfn);
 396
 397        /* If the FQ fails we can simply fall back to strict mode */
 398        if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
 399                domain->type = IOMMU_DOMAIN_DMA;
 400
 401        return iova_reserve_iommu_regions(dev, domain);
 402}
 403
 404/**
 405 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
 406 *                    page flags.
 407 * @dir: Direction of DMA transfer
 408 * @coherent: Is the DMA master cache-coherent?
 409 * @attrs: DMA attributes for the mapping
 410 *
 411 * Return: corresponding IOMMU API page protection flags
 412 */
 413static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 414                     unsigned long attrs)
 415{
 416        int prot = coherent ? IOMMU_CACHE : 0;
 417
 418        if (attrs & DMA_ATTR_PRIVILEGED)
 419                prot |= IOMMU_PRIV;
 420
 421        switch (dir) {
 422        case DMA_BIDIRECTIONAL:
 423                return prot | IOMMU_READ | IOMMU_WRITE;
 424        case DMA_TO_DEVICE:
 425                return prot | IOMMU_READ;
 426        case DMA_FROM_DEVICE:
 427                return prot | IOMMU_WRITE;
 428        default:
 429                return 0;
 430        }
 431}
 432
 433static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 434                size_t size, u64 dma_limit, struct device *dev)
 435{
 436        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 437        struct iova_domain *iovad = &cookie->iovad;
 438        unsigned long shift, iova_len, iova = 0;
 439
 440        if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
 441                cookie->msi_iova += size;
 442                return cookie->msi_iova - size;
 443        }
 444
 445        shift = iova_shift(iovad);
 446        iova_len = size >> shift;
 447        /*
 448         * Freeing non-power-of-two-sized allocations back into the IOVA caches
 449         * will come back to bite us badly, so we have to waste a bit of space
 450         * rounding up anything cacheable to make sure that can't happen. The
 451         * order of the unadjusted size will still match upon freeing.
 452         */
 453        if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
 454                iova_len = roundup_pow_of_two(iova_len);
 455
 456        dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
 457
 458        if (domain->geometry.force_aperture)
 459                dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
 460
 461        /* Try to get PCI devices a SAC address */
 462        if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
 463                iova = alloc_iova_fast(iovad, iova_len,
 464                                       DMA_BIT_MASK(32) >> shift, false);
 465
 466        if (!iova)
 467                iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
 468                                       true);
 469
 470        return (dma_addr_t)iova << shift;
 471}
 472
 473static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 474                dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
 475{
 476        struct iova_domain *iovad = &cookie->iovad;
 477
 478        /* The MSI case is only ever cleaning up its most recent allocation */
 479        if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 480                cookie->msi_iova -= size;
 481        else if (gather && gather->queued)
 482                queue_iova(iovad, iova_pfn(iovad, iova),
 483                                size >> iova_shift(iovad),
 484                                (unsigned long)gather->freelist);
 485        else
 486                free_iova_fast(iovad, iova_pfn(iovad, iova),
 487                                size >> iova_shift(iovad));
 488}
 489
 490static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
 491                size_t size)
 492{
 493        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 494        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 495        struct iova_domain *iovad = &cookie->iovad;
 496        size_t iova_off = iova_offset(iovad, dma_addr);
 497        struct iommu_iotlb_gather iotlb_gather;
 498        size_t unmapped;
 499
 500        dma_addr -= iova_off;
 501        size = iova_align(iovad, size + iova_off);
 502        iommu_iotlb_gather_init(&iotlb_gather);
 503        iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
 504
 505        unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
 506        WARN_ON(unmapped != size);
 507
 508        if (!iotlb_gather.queued)
 509                iommu_iotlb_sync(domain, &iotlb_gather);
 510        iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
 511}
 512
 513static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 514                size_t size, enum dma_data_direction dir,
 515                unsigned long attrs)
 516{
 517        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 518        phys_addr_t phys;
 519
 520        phys = iommu_iova_to_phys(domain, dma_addr);
 521        if (WARN_ON(!phys))
 522                return;
 523
 524        __iommu_dma_unmap(dev, dma_addr, size);
 525
 526        if (unlikely(is_swiotlb_buffer(dev, phys)))
 527                swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 528}
 529
 530static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 531                size_t size, int prot, u64 dma_mask)
 532{
 533        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 534        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 535        struct iova_domain *iovad = &cookie->iovad;
 536        size_t iova_off = iova_offset(iovad, phys);
 537        dma_addr_t iova;
 538
 539        if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 540            iommu_deferred_attach(dev, domain))
 541                return DMA_MAPPING_ERROR;
 542
 543        size = iova_align(iovad, size + iova_off);
 544
 545        iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
 546        if (!iova)
 547                return DMA_MAPPING_ERROR;
 548
 549        if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
 550                iommu_dma_free_iova(cookie, iova, size, NULL);
 551                return DMA_MAPPING_ERROR;
 552        }
 553        return iova + iova_off;
 554}
 555
 556static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 557                size_t org_size, dma_addr_t dma_mask, bool coherent,
 558                enum dma_data_direction dir, unsigned long attrs)
 559{
 560        int prot = dma_info_to_prot(dir, coherent, attrs);
 561        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 562        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 563        struct iova_domain *iovad = &cookie->iovad;
 564        size_t aligned_size = org_size;
 565        void *padding_start;
 566        size_t padding_size;
 567        dma_addr_t iova;
 568
 569        /*
 570         * If both the physical buffer start address and size are
 571         * page aligned, we don't need to use a bounce page.
 572         */
 573        if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
 574            iova_offset(iovad, phys | org_size)) {
 575                aligned_size = iova_align(iovad, org_size);
 576                phys = swiotlb_tbl_map_single(dev, phys, org_size,
 577                                              aligned_size, dir, attrs);
 578
 579                if (phys == DMA_MAPPING_ERROR)
 580                        return DMA_MAPPING_ERROR;
 581
 582                /* Cleanup the padding area. */
 583                padding_start = phys_to_virt(phys);
 584                padding_size = aligned_size;
 585
 586                if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 587                    (dir == DMA_TO_DEVICE ||
 588                     dir == DMA_BIDIRECTIONAL)) {
 589                        padding_start += org_size;
 590                        padding_size -= org_size;
 591                }
 592
 593                memset(padding_start, 0, padding_size);
 594        }
 595
 596        iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
 597        if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
 598                swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
 599        return iova;
 600}
 601
 602static void __iommu_dma_free_pages(struct page **pages, int count)
 603{
 604        while (count--)
 605                __free_page(pages[count]);
 606        kvfree(pages);
 607}
 608
 609static struct page **__iommu_dma_alloc_pages(struct device *dev,
 610                unsigned int count, unsigned long order_mask, gfp_t gfp)
 611{
 612        struct page **pages;
 613        unsigned int i = 0, nid = dev_to_node(dev);
 614
 615        order_mask &= (2U << MAX_ORDER) - 1;
 616        if (!order_mask)
 617                return NULL;
 618
 619        pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
 620        if (!pages)
 621                return NULL;
 622
 623        /* IOMMU can map any pages, so himem can also be used here */
 624        gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
 625
 626        /* It makes no sense to muck about with huge pages */
 627        gfp &= ~__GFP_COMP;
 628
 629        while (count) {
 630                struct page *page = NULL;
 631                unsigned int order_size;
 632
 633                /*
 634                 * Higher-order allocations are a convenience rather
 635                 * than a necessity, hence using __GFP_NORETRY until
 636                 * falling back to minimum-order allocations.
 637                 */
 638                for (order_mask &= (2U << __fls(count)) - 1;
 639                     order_mask; order_mask &= ~order_size) {
 640                        unsigned int order = __fls(order_mask);
 641                        gfp_t alloc_flags = gfp;
 642
 643                        order_size = 1U << order;
 644                        if (order_mask > order_size)
 645                                alloc_flags |= __GFP_NORETRY;
 646                        page = alloc_pages_node(nid, alloc_flags, order);
 647                        if (!page)
 648                                continue;
 649                        if (order)
 650                                split_page(page, order);
 651                        break;
 652                }
 653                if (!page) {
 654                        __iommu_dma_free_pages(pages, i);
 655                        return NULL;
 656                }
 657                count -= order_size;
 658                while (order_size--)
 659                        pages[i++] = page++;
 660        }
 661        return pages;
 662}
 663
 664/*
 665 * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
 666 * but an IOMMU which supports smaller pages might not map the whole thing.
 667 */
 668static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
 669                size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
 670                unsigned long attrs)
 671{
 672        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 673        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 674        struct iova_domain *iovad = &cookie->iovad;
 675        bool coherent = dev_is_dma_coherent(dev);
 676        int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
 677        unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 678        struct page **pages;
 679        dma_addr_t iova;
 680
 681        if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 682            iommu_deferred_attach(dev, domain))
 683                return NULL;
 684
 685        min_size = alloc_sizes & -alloc_sizes;
 686        if (min_size < PAGE_SIZE) {
 687                min_size = PAGE_SIZE;
 688                alloc_sizes |= PAGE_SIZE;
 689        } else {
 690                size = ALIGN(size, min_size);
 691        }
 692        if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
 693                alloc_sizes = min_size;
 694
 695        count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 696        pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
 697                                        gfp);
 698        if (!pages)
 699                return NULL;
 700
 701        size = iova_align(iovad, size);
 702        iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
 703        if (!iova)
 704                goto out_free_pages;
 705
 706        if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
 707                goto out_free_iova;
 708
 709        if (!(ioprot & IOMMU_CACHE)) {
 710                struct scatterlist *sg;
 711                int i;
 712
 713                for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
 714                        arch_dma_prep_coherent(sg_page(sg), sg->length);
 715        }
 716
 717        if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)
 718                        < size)
 719                goto out_free_sg;
 720
 721        sgt->sgl->dma_address = iova;
 722        sgt->sgl->dma_length = size;
 723        return pages;
 724
 725out_free_sg:
 726        sg_free_table(sgt);
 727out_free_iova:
 728        iommu_dma_free_iova(cookie, iova, size, NULL);
 729out_free_pages:
 730        __iommu_dma_free_pages(pages, count);
 731        return NULL;
 732}
 733
 734static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
 735                dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
 736                unsigned long attrs)
 737{
 738        struct page **pages;
 739        struct sg_table sgt;
 740        void *vaddr;
 741
 742        pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
 743                                                attrs);
 744        if (!pages)
 745                return NULL;
 746        *dma_handle = sgt.sgl->dma_address;
 747        sg_free_table(&sgt);
 748        vaddr = dma_common_pages_remap(pages, size, prot,
 749                        __builtin_return_address(0));
 750        if (!vaddr)
 751                goto out_unmap;
 752        return vaddr;
 753
 754out_unmap:
 755        __iommu_dma_unmap(dev, *dma_handle, size);
 756        __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 757        return NULL;
 758}
 759
 760#ifdef CONFIG_DMA_REMAP
 761static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
 762                size_t size, enum dma_data_direction dir, gfp_t gfp,
 763                unsigned long attrs)
 764{
 765        struct dma_sgt_handle *sh;
 766
 767        sh = kmalloc(sizeof(*sh), gfp);
 768        if (!sh)
 769                return NULL;
 770
 771        sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
 772                                                    PAGE_KERNEL, attrs);
 773        if (!sh->pages) {
 774                kfree(sh);
 775                return NULL;
 776        }
 777        return &sh->sgt;
 778}
 779
 780static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
 781                struct sg_table *sgt, enum dma_data_direction dir)
 782{
 783        struct dma_sgt_handle *sh = sgt_handle(sgt);
 784
 785        __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
 786        __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 787        sg_free_table(&sh->sgt);
 788        kfree(sh);
 789}
 790#endif /* CONFIG_DMA_REMAP */
 791
 792static void iommu_dma_sync_single_for_cpu(struct device *dev,
 793                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 794{
 795        phys_addr_t phys;
 796
 797        if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 798                return;
 799
 800        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
 801        if (!dev_is_dma_coherent(dev))
 802                arch_sync_dma_for_cpu(phys, size, dir);
 803
 804        if (is_swiotlb_buffer(dev, phys))
 805                swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 806}
 807
 808static void iommu_dma_sync_single_for_device(struct device *dev,
 809                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 810{
 811        phys_addr_t phys;
 812
 813        if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 814                return;
 815
 816        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
 817        if (is_swiotlb_buffer(dev, phys))
 818                swiotlb_sync_single_for_device(dev, phys, size, dir);
 819
 820        if (!dev_is_dma_coherent(dev))
 821                arch_sync_dma_for_device(phys, size, dir);
 822}
 823
 824static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 825                struct scatterlist *sgl, int nelems,
 826                enum dma_data_direction dir)
 827{
 828        struct scatterlist *sg;
 829        int i;
 830
 831        if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 832                return;
 833
 834        for_each_sg(sgl, sg, nelems, i) {
 835                if (!dev_is_dma_coherent(dev))
 836                        arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 837
 838                if (is_swiotlb_buffer(dev, sg_phys(sg)))
 839                        swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
 840                                                    sg->length, dir);
 841        }
 842}
 843
 844static void iommu_dma_sync_sg_for_device(struct device *dev,
 845                struct scatterlist *sgl, int nelems,
 846                enum dma_data_direction dir)
 847{
 848        struct scatterlist *sg;
 849        int i;
 850
 851        if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 852                return;
 853
 854        for_each_sg(sgl, sg, nelems, i) {
 855                if (is_swiotlb_buffer(dev, sg_phys(sg)))
 856                        swiotlb_sync_single_for_device(dev, sg_phys(sg),
 857                                                       sg->length, dir);
 858
 859                if (!dev_is_dma_coherent(dev))
 860                        arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
 861        }
 862}
 863
 864static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 865                unsigned long offset, size_t size, enum dma_data_direction dir,
 866                unsigned long attrs)
 867{
 868        phys_addr_t phys = page_to_phys(page) + offset;
 869        bool coherent = dev_is_dma_coherent(dev);
 870        dma_addr_t dma_handle;
 871
 872        dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
 873                        coherent, dir, attrs);
 874        if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 875            dma_handle != DMA_MAPPING_ERROR)
 876                arch_sync_dma_for_device(phys, size, dir);
 877        return dma_handle;
 878}
 879
 880static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
 881                size_t size, enum dma_data_direction dir, unsigned long attrs)
 882{
 883        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 884                iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
 885        __iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 886}
 887
 888/*
 889 * Prepare a successfully-mapped scatterlist to give back to the caller.
 890 *
 891 * At this point the segments are already laid out by iommu_dma_map_sg() to
 892 * avoid individually crossing any boundaries, so we merely need to check a
 893 * segment's start address to avoid concatenating across one.
 894 */
 895static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 896                dma_addr_t dma_addr)
 897{
 898        struct scatterlist *s, *cur = sg;
 899        unsigned long seg_mask = dma_get_seg_boundary(dev);
 900        unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
 901        int i, count = 0;
 902
 903        for_each_sg(sg, s, nents, i) {
 904                /* Restore this segment's original unaligned fields first */
 905                unsigned int s_iova_off = sg_dma_address(s);
 906                unsigned int s_length = sg_dma_len(s);
 907                unsigned int s_iova_len = s->length;
 908
 909                s->offset += s_iova_off;
 910                s->length = s_length;
 911                sg_dma_address(s) = DMA_MAPPING_ERROR;
 912                sg_dma_len(s) = 0;
 913
 914                /*
 915                 * Now fill in the real DMA data. If...
 916                 * - there is a valid output segment to append to
 917                 * - and this segment starts on an IOVA page boundary
 918                 * - but doesn't fall at a segment boundary
 919                 * - and wouldn't make the resulting output segment too long
 920                 */
 921                if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
 922                    (max_len - cur_len >= s_length)) {
 923                        /* ...then concatenate it with the previous one */
 924                        cur_len += s_length;
 925                } else {
 926                        /* Otherwise start the next output segment */
 927                        if (i > 0)
 928                                cur = sg_next(cur);
 929                        cur_len = s_length;
 930                        count++;
 931
 932                        sg_dma_address(cur) = dma_addr + s_iova_off;
 933                }
 934
 935                sg_dma_len(cur) = cur_len;
 936                dma_addr += s_iova_len;
 937
 938                if (s_length + s_iova_off < s_iova_len)
 939                        cur_len = 0;
 940        }
 941        return count;
 942}
 943
 944/*
 945 * If mapping failed, then just restore the original list,
 946 * but making sure the DMA fields are invalidated.
 947 */
 948static void __invalidate_sg(struct scatterlist *sg, int nents)
 949{
 950        struct scatterlist *s;
 951        int i;
 952
 953        for_each_sg(sg, s, nents, i) {
 954                if (sg_dma_address(s) != DMA_MAPPING_ERROR)
 955                        s->offset += sg_dma_address(s);
 956                if (sg_dma_len(s))
 957                        s->length = sg_dma_len(s);
 958                sg_dma_address(s) = DMA_MAPPING_ERROR;
 959                sg_dma_len(s) = 0;
 960        }
 961}
 962
 963static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 964                int nents, enum dma_data_direction dir, unsigned long attrs)
 965{
 966        struct scatterlist *s;
 967        int i;
 968
 969        for_each_sg(sg, s, nents, i)
 970                __iommu_dma_unmap_swiotlb(dev, sg_dma_address(s),
 971                                sg_dma_len(s), dir, attrs);
 972}
 973
 974static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 975                int nents, enum dma_data_direction dir, unsigned long attrs)
 976{
 977        struct scatterlist *s;
 978        int i;
 979
 980        for_each_sg(sg, s, nents, i) {
 981                sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
 982                                s->length, dma_get_mask(dev),
 983                                dev_is_dma_coherent(dev), dir, attrs);
 984                if (sg_dma_address(s) == DMA_MAPPING_ERROR)
 985                        goto out_unmap;
 986                sg_dma_len(s) = s->length;
 987        }
 988
 989        return nents;
 990
 991out_unmap:
 992        iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
 993        return -EIO;
 994}
 995
 996/*
 997 * The DMA API client is passing in a scatterlist which could describe
 998 * any old buffer layout, but the IOMMU API requires everything to be
 999 * aligned to IOMMU pages. Hence the need for this complicated bit of
1000 * impedance-matching, to be able to hand off a suitably-aligned list,
1001 * but still preserve the original offsets and sizes for the caller.
1002 */
1003static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
1004                int nents, enum dma_data_direction dir, unsigned long attrs)
1005{
1006        struct iommu_domain *domain = iommu_get_dma_domain(dev);
1007        struct iommu_dma_cookie *cookie = domain->iova_cookie;
1008        struct iova_domain *iovad = &cookie->iovad;
1009        struct scatterlist *s, *prev = NULL;
1010        int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
1011        dma_addr_t iova;
1012        size_t iova_len = 0;
1013        unsigned long mask = dma_get_seg_boundary(dev);
1014        ssize_t ret;
1015        int i;
1016
1017        if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
1018                ret = iommu_deferred_attach(dev, domain);
1019                goto out;
1020        }
1021
1022        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1023                iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
1024
1025        if (dev_is_untrusted(dev))
1026                return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
1027
1028        /*
1029         * Work out how much IOVA space we need, and align the segments to
1030         * IOVA granules for the IOMMU driver to handle. With some clever
1031         * trickery we can modify the list in-place, but reversibly, by
1032         * stashing the unaligned parts in the as-yet-unused DMA fields.
1033         */
1034        for_each_sg(sg, s, nents, i) {
1035                size_t s_iova_off = iova_offset(iovad, s->offset);
1036                size_t s_length = s->length;
1037                size_t pad_len = (mask - iova_len + 1) & mask;
1038
1039                sg_dma_address(s) = s_iova_off;
1040                sg_dma_len(s) = s_length;
1041                s->offset -= s_iova_off;
1042                s_length = iova_align(iovad, s_length + s_iova_off);
1043                s->length = s_length;
1044
1045                /*
1046                 * Due to the alignment of our single IOVA allocation, we can
1047                 * depend on these assumptions about the segment boundary mask:
1048                 * - If mask size >= IOVA size, then the IOVA range cannot
1049                 *   possibly fall across a boundary, so we don't care.
1050                 * - If mask size < IOVA size, then the IOVA range must start
1051                 *   exactly on a boundary, therefore we can lay things out
1052                 *   based purely on segment lengths without needing to know
1053                 *   the actual addresses beforehand.
1054                 * - The mask must be a power of 2, so pad_len == 0 if
1055                 *   iova_len == 0, thus we cannot dereference prev the first
1056                 *   time through here (i.e. before it has a meaningful value).
1057                 */
1058                if (pad_len && pad_len < s_length - 1) {
1059                        prev->length += pad_len;
1060                        iova_len += pad_len;
1061                }
1062
1063                iova_len += s_length;
1064                prev = s;
1065        }
1066
1067        iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
1068        if (!iova) {
1069                ret = -ENOMEM;
1070                goto out_restore_sg;
1071        }
1072
1073        /*
1074         * We'll leave any physical concatenation to the IOMMU driver's
1075         * implementation - it knows better than we do.
1076         */
1077        ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
1078        if (ret < iova_len)
1079                goto out_free_iova;
1080
1081        return __finalise_sg(dev, sg, nents, iova);
1082
1083out_free_iova:
1084        iommu_dma_free_iova(cookie, iova, iova_len, NULL);
1085out_restore_sg:
1086        __invalidate_sg(sg, nents);
1087out:
1088        if (ret != -ENOMEM)
1089                return -EINVAL;
1090        return ret;
1091}
1092
1093static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
1094                int nents, enum dma_data_direction dir, unsigned long attrs)
1095{
1096        dma_addr_t start, end;
1097        struct scatterlist *tmp;
1098        int i;
1099
1100        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1101                iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
1102
1103        if (dev_is_untrusted(dev)) {
1104                iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
1105                return;
1106        }
1107
1108        /*
1109         * The scatterlist segments are mapped into a single
1110         * contiguous IOVA allocation, so this is incredibly easy.
1111         */
1112        start = sg_dma_address(sg);
1113        for_each_sg(sg_next(sg), tmp, nents - 1, i) {
1114                if (sg_dma_len(tmp) == 0)
1115                        break;
1116                sg = tmp;
1117        }
1118        end = sg_dma_address(sg) + sg_dma_len(sg);
1119        __iommu_dma_unmap(dev, start, end - start);
1120}
1121
1122static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
1123                size_t size, enum dma_data_direction dir, unsigned long attrs)
1124{
1125        return __iommu_dma_map(dev, phys, size,
1126                        dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
1127                        dma_get_mask(dev));
1128}
1129
1130static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
1131                size_t size, enum dma_data_direction dir, unsigned long attrs)
1132{
1133        __iommu_dma_unmap(dev, handle, size);
1134}
1135
1136static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
1137{
1138        size_t alloc_size = PAGE_ALIGN(size);
1139        int count = alloc_size >> PAGE_SHIFT;
1140        struct page *page = NULL, **pages = NULL;
1141
1142        /* Non-coherent atomic allocation? Easy */
1143        if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1144            dma_free_from_pool(dev, cpu_addr, alloc_size))
1145                return;
1146
1147        if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1148                /*
1149                 * If it the address is remapped, then it's either non-coherent
1150                 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
1151                 */
1152                pages = dma_common_find_pages(cpu_addr);
1153                if (!pages)
1154                        page = vmalloc_to_page(cpu_addr);
1155                dma_common_free_remap(cpu_addr, alloc_size);
1156        } else {
1157                /* Lowmem means a coherent atomic or CMA allocation */
1158                page = virt_to_page(cpu_addr);
1159        }
1160
1161        if (pages)
1162                __iommu_dma_free_pages(pages, count);
1163        if (page)
1164                dma_free_contiguous(dev, page, alloc_size);
1165}
1166
1167static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
1168                dma_addr_t handle, unsigned long attrs)
1169{
1170        __iommu_dma_unmap(dev, handle, size);
1171        __iommu_dma_free(dev, size, cpu_addr);
1172}
1173
1174static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
1175                struct page **pagep, gfp_t gfp, unsigned long attrs)
1176{
1177        bool coherent = dev_is_dma_coherent(dev);
1178        size_t alloc_size = PAGE_ALIGN(size);
1179        int node = dev_to_node(dev);
1180        struct page *page = NULL;
1181        void *cpu_addr;
1182
1183        page = dma_alloc_contiguous(dev, alloc_size, gfp);
1184        if (!page)
1185                page = alloc_pages_node(node, gfp, get_order(alloc_size));
1186        if (!page)
1187                return NULL;
1188
1189        if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) {
1190                pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
1191
1192                cpu_addr = dma_common_contiguous_remap(page, alloc_size,
1193                                prot, __builtin_return_address(0));
1194                if (!cpu_addr)
1195                        goto out_free_pages;
1196
1197                if (!coherent)
1198                        arch_dma_prep_coherent(page, size);
1199        } else {
1200                cpu_addr = page_address(page);
1201        }
1202
1203        *pagep = page;
1204        memset(cpu_addr, 0, alloc_size);
1205        return cpu_addr;
1206out_free_pages:
1207        dma_free_contiguous(dev, page, alloc_size);
1208        return NULL;
1209}
1210
1211static void *iommu_dma_alloc(struct device *dev, size_t size,
1212                dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1213{
1214        bool coherent = dev_is_dma_coherent(dev);
1215        int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
1216        struct page *page = NULL;
1217        void *cpu_addr;
1218
1219        gfp |= __GFP_ZERO;
1220
1221        if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) &&
1222            !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
1223                return iommu_dma_alloc_remap(dev, size, handle, gfp,
1224                                dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
1225        }
1226
1227        if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1228            !gfpflags_allow_blocking(gfp) && !coherent)
1229                page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
1230                                               gfp, NULL);
1231        else
1232                cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
1233        if (!cpu_addr)
1234                return NULL;
1235
1236        *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
1237                        dev->coherent_dma_mask);
1238        if (*handle == DMA_MAPPING_ERROR) {
1239                __iommu_dma_free(dev, size, cpu_addr);
1240                return NULL;
1241        }
1242
1243        return cpu_addr;
1244}
1245
1246static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1247                void *cpu_addr, dma_addr_t dma_addr, size_t size,
1248                unsigned long attrs)
1249{
1250        unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1251        unsigned long pfn, off = vma->vm_pgoff;
1252        int ret;
1253
1254        vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
1255
1256        if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
1257                return ret;
1258
1259        if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
1260                return -ENXIO;
1261
1262        if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1263                struct page **pages = dma_common_find_pages(cpu_addr);
1264
1265                if (pages)
1266                        return vm_map_pages(vma, pages, nr_pages);
1267                pfn = vmalloc_to_pfn(cpu_addr);
1268        } else {
1269                pfn = page_to_pfn(virt_to_page(cpu_addr));
1270        }
1271
1272        return remap_pfn_range(vma, vma->vm_start, pfn + off,
1273                               vma->vm_end - vma->vm_start,
1274                               vma->vm_page_prot);
1275}
1276
1277static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1278                void *cpu_addr, dma_addr_t dma_addr, size_t size,
1279                unsigned long attrs)
1280{
1281        struct page *page;
1282        int ret;
1283
1284        if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1285                struct page **pages = dma_common_find_pages(cpu_addr);
1286
1287                if (pages) {
1288                        return sg_alloc_table_from_pages(sgt, pages,
1289                                        PAGE_ALIGN(size) >> PAGE_SHIFT,
1290                                        0, size, GFP_KERNEL);
1291                }
1292
1293                page = vmalloc_to_page(cpu_addr);
1294        } else {
1295                page = virt_to_page(cpu_addr);
1296        }
1297
1298        ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
1299        if (!ret)
1300                sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
1301        return ret;
1302}
1303
1304static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1305{
1306        struct iommu_domain *domain = iommu_get_dma_domain(dev);
1307
1308        return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1309}
1310
1311static const struct dma_map_ops iommu_dma_ops = {
1312        .alloc                  = iommu_dma_alloc,
1313        .free                   = iommu_dma_free,
1314        .alloc_pages            = dma_common_alloc_pages,
1315        .free_pages             = dma_common_free_pages,
1316#ifdef CONFIG_DMA_REMAP
1317        .alloc_noncontiguous    = iommu_dma_alloc_noncontiguous,
1318        .free_noncontiguous     = iommu_dma_free_noncontiguous,
1319#endif
1320        .mmap                   = iommu_dma_mmap,
1321        .get_sgtable            = iommu_dma_get_sgtable,
1322        .map_page               = iommu_dma_map_page,
1323        .unmap_page             = iommu_dma_unmap_page,
1324        .map_sg                 = iommu_dma_map_sg,
1325        .unmap_sg               = iommu_dma_unmap_sg,
1326        .sync_single_for_cpu    = iommu_dma_sync_single_for_cpu,
1327        .sync_single_for_device = iommu_dma_sync_single_for_device,
1328        .sync_sg_for_cpu        = iommu_dma_sync_sg_for_cpu,
1329        .sync_sg_for_device     = iommu_dma_sync_sg_for_device,
1330        .map_resource           = iommu_dma_map_resource,
1331        .unmap_resource         = iommu_dma_unmap_resource,
1332        .get_merge_boundary     = iommu_dma_get_merge_boundary,
1333};
1334
1335/*
1336 * The IOMMU core code allocates the default DMA domain, which the underlying
1337 * IOMMU driver needs to support via the dma-iommu layer.
1338 */
1339void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
1340{
1341        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1342
1343        if (!domain)
1344                goto out_err;
1345
1346        /*
1347         * The IOMMU core code allocates the default DMA domain, which the
1348         * underlying IOMMU driver needs to support via the dma-iommu layer.
1349         */
1350        if (iommu_is_dma_domain(domain)) {
1351                if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
1352                        goto out_err;
1353                dev->dma_ops = &iommu_dma_ops;
1354        }
1355
1356        return;
1357out_err:
1358         pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
1359                 dev_name(dev));
1360}
1361EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
1362
1363static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
1364                phys_addr_t msi_addr, struct iommu_domain *domain)
1365{
1366        struct iommu_dma_cookie *cookie = domain->iova_cookie;
1367        struct iommu_dma_msi_page *msi_page;
1368        dma_addr_t iova;
1369        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1370        size_t size = cookie_msi_granule(cookie);
1371
1372        msi_addr &= ~(phys_addr_t)(size - 1);
1373        list_for_each_entry(msi_page, &cookie->msi_page_list, list)
1374                if (msi_page->phys == msi_addr)
1375                        return msi_page;
1376
1377        msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
1378        if (!msi_page)
1379                return NULL;
1380
1381        iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
1382        if (!iova)
1383                goto out_free_page;
1384
1385        if (iommu_map(domain, iova, msi_addr, size, prot))
1386                goto out_free_iova;
1387
1388        INIT_LIST_HEAD(&msi_page->list);
1389        msi_page->phys = msi_addr;
1390        msi_page->iova = iova;
1391        list_add(&msi_page->list, &cookie->msi_page_list);
1392        return msi_page;
1393
1394out_free_iova:
1395        iommu_dma_free_iova(cookie, iova, size, NULL);
1396out_free_page:
1397        kfree(msi_page);
1398        return NULL;
1399}
1400
1401int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
1402{
1403        struct device *dev = msi_desc_to_dev(desc);
1404        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1405        struct iommu_dma_msi_page *msi_page;
1406        static DEFINE_MUTEX(msi_prepare_lock); /* see below */
1407
1408        if (!domain || !domain->iova_cookie) {
1409                desc->iommu_cookie = NULL;
1410                return 0;
1411        }
1412
1413        /*
1414         * In fact the whole prepare operation should already be serialised by
1415         * irq_domain_mutex further up the callchain, but that's pretty subtle
1416         * on its own, so consider this locking as failsafe documentation...
1417         */
1418        mutex_lock(&msi_prepare_lock);
1419        msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
1420        mutex_unlock(&msi_prepare_lock);
1421
1422        msi_desc_set_iommu_cookie(desc, msi_page);
1423
1424        if (!msi_page)
1425                return -ENOMEM;
1426        return 0;
1427}
1428
1429void iommu_dma_compose_msi_msg(struct msi_desc *desc,
1430                               struct msi_msg *msg)
1431{
1432        struct device *dev = msi_desc_to_dev(desc);
1433        const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1434        const struct iommu_dma_msi_page *msi_page;
1435
1436        msi_page = msi_desc_get_iommu_cookie(desc);
1437
1438        if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
1439                return;
1440
1441        msg->address_hi = upper_32_bits(msi_page->iova);
1442        msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
1443        msg->address_lo += lower_32_bits(msi_page->iova);
1444}
1445
1446static int iommu_dma_init(void)
1447{
1448        if (is_kdump_kernel())
1449                static_branch_enable(&iommu_deferred_attach_enabled);
1450
1451        return iova_cache_get();
1452}
1453arch_initcall(iommu_dma_init);
1454