linux/drivers/iommu/dma-iommu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * A fairly generic DMA-API to IOMMU-API glue layer.
   4 *
   5 * Copyright (C) 2014-2015 ARM Ltd.
   6 *
   7 * based in part on arch/arm/mm/dma-mapping.c:
   8 * Copyright (C) 2000-2004 Russell King
   9 */
  10
  11#include <linux/acpi_iort.h>
  12#include <linux/atomic.h>
  13#include <linux/crash_dump.h>
  14#include <linux/device.h>
  15#include <linux/dma-direct.h>
  16#include <linux/dma-iommu.h>
  17#include <linux/dma-map-ops.h>
  18#include <linux/gfp.h>
  19#include <linux/huge_mm.h>
  20#include <linux/iommu.h>
  21#include <linux/iova.h>
  22#include <linux/irq.h>
  23#include <linux/list_sort.h>
  24#include <linux/mm.h>
  25#include <linux/mutex.h>
  26#include <linux/pci.h>
  27#include <linux/scatterlist.h>
  28#include <linux/spinlock.h>
  29#include <linux/swiotlb.h>
  30#include <linux/vmalloc.h>
  31
  32struct iommu_dma_msi_page {
  33        struct list_head        list;
  34        dma_addr_t              iova;
  35        phys_addr_t             phys;
  36};
  37
  38enum iommu_dma_cookie_type {
  39        IOMMU_DMA_IOVA_COOKIE,
  40        IOMMU_DMA_MSI_COOKIE,
  41};
  42
  43struct iommu_dma_cookie {
  44        enum iommu_dma_cookie_type      type;
  45        union {
  46                /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
  47                struct {
  48                        struct iova_domain      iovad;
  49
  50                        struct iova_fq __percpu *fq;    /* Flush queue */
  51                        /* Number of TLB flushes that have been started */
  52                        atomic64_t              fq_flush_start_cnt;
  53                        /* Number of TLB flushes that have been finished */
  54                        atomic64_t              fq_flush_finish_cnt;
  55                        /* Timer to regularily empty the flush queues */
  56                        struct timer_list       fq_timer;
  57                        /* 1 when timer is active, 0 when not */
  58                        atomic_t                fq_timer_on;
  59                };
  60                /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
  61                dma_addr_t              msi_iova;
  62        };
  63        struct list_head                msi_page_list;
  64
  65        /* Domain for flush queue callback; NULL if flush queue not in use */
  66        struct iommu_domain             *fq_domain;
  67};
  68
  69static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
  70bool iommu_dma_forcedac __read_mostly;
  71
  72static int __init iommu_dma_forcedac_setup(char *str)
  73{
  74        int ret = kstrtobool(str, &iommu_dma_forcedac);
  75
  76        if (!ret && iommu_dma_forcedac)
  77                pr_info("Forcing DAC for PCI devices\n");
  78        return ret;
  79}
  80early_param("iommu.forcedac", iommu_dma_forcedac_setup);
  81
  82/* Number of entries per flush queue */
  83#define IOVA_FQ_SIZE    256
  84
  85/* Timeout (in ms) after which entries are flushed from the queue */
  86#define IOVA_FQ_TIMEOUT 10
  87
  88/* Flush queue entry for deferred flushing */
  89struct iova_fq_entry {
  90        unsigned long iova_pfn;
  91        unsigned long pages;
  92        struct list_head freelist;
  93        u64 counter; /* Flush counter when this entry was added */
  94};
  95
  96/* Per-CPU flush queue structure */
  97struct iova_fq {
  98        struct iova_fq_entry entries[IOVA_FQ_SIZE];
  99        unsigned int head, tail;
 100        spinlock_t lock;
 101};
 102
 103#define fq_ring_for_each(i, fq) \
 104        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 105
 106static inline bool fq_full(struct iova_fq *fq)
 107{
 108        assert_spin_locked(&fq->lock);
 109        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 110}
 111
 112static inline unsigned int fq_ring_add(struct iova_fq *fq)
 113{
 114        unsigned int idx = fq->tail;
 115
 116        assert_spin_locked(&fq->lock);
 117
 118        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 119
 120        return idx;
 121}
 122
 123static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
 124{
 125        u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
 126        unsigned int idx;
 127
 128        assert_spin_locked(&fq->lock);
 129
 130        fq_ring_for_each(idx, fq) {
 131
 132                if (fq->entries[idx].counter >= counter)
 133                        break;
 134
 135                put_pages_list(&fq->entries[idx].freelist);
 136                free_iova_fast(&cookie->iovad,
 137                               fq->entries[idx].iova_pfn,
 138                               fq->entries[idx].pages);
 139
 140                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 141        }
 142}
 143
 144static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
 145{
 146        atomic64_inc(&cookie->fq_flush_start_cnt);
 147        cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
 148        atomic64_inc(&cookie->fq_flush_finish_cnt);
 149}
 150
 151static void fq_flush_timeout(struct timer_list *t)
 152{
 153        struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
 154        int cpu;
 155
 156        atomic_set(&cookie->fq_timer_on, 0);
 157        fq_flush_iotlb(cookie);
 158
 159        for_each_possible_cpu(cpu) {
 160                unsigned long flags;
 161                struct iova_fq *fq;
 162
 163                fq = per_cpu_ptr(cookie->fq, cpu);
 164                spin_lock_irqsave(&fq->lock, flags);
 165                fq_ring_free(cookie, fq);
 166                spin_unlock_irqrestore(&fq->lock, flags);
 167        }
 168}
 169
 170static void queue_iova(struct iommu_dma_cookie *cookie,
 171                unsigned long pfn, unsigned long pages,
 172                struct list_head *freelist)
 173{
 174        struct iova_fq *fq;
 175        unsigned long flags;
 176        unsigned int idx;
 177
 178        /*
 179         * Order against the IOMMU driver's pagetable update from unmapping
 180         * @pte, to guarantee that fq_flush_iotlb() observes that if called
 181         * from a different CPU before we release the lock below. Full barrier
 182         * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
 183         * written fq state here.
 184         */
 185        smp_mb();
 186
 187        fq = raw_cpu_ptr(cookie->fq);
 188        spin_lock_irqsave(&fq->lock, flags);
 189
 190        /*
 191         * First remove all entries from the flush queue that have already been
 192         * flushed out on another CPU. This makes the fq_full() check below less
 193         * likely to be true.
 194         */
 195        fq_ring_free(cookie, fq);
 196
 197        if (fq_full(fq)) {
 198                fq_flush_iotlb(cookie);
 199                fq_ring_free(cookie, fq);
 200        }
 201
 202        idx = fq_ring_add(fq);
 203
 204        fq->entries[idx].iova_pfn = pfn;
 205        fq->entries[idx].pages    = pages;
 206        fq->entries[idx].counter  = atomic64_read(&cookie->fq_flush_start_cnt);
 207        list_splice(freelist, &fq->entries[idx].freelist);
 208
 209        spin_unlock_irqrestore(&fq->lock, flags);
 210
 211        /* Avoid false sharing as much as possible. */
 212        if (!atomic_read(&cookie->fq_timer_on) &&
 213            !atomic_xchg(&cookie->fq_timer_on, 1))
 214                mod_timer(&cookie->fq_timer,
 215                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 216}
 217
 218static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
 219{
 220        int cpu, idx;
 221
 222        if (!cookie->fq)
 223                return;
 224
 225        del_timer_sync(&cookie->fq_timer);
 226        /* The IOVAs will be torn down separately, so just free our queued pages */
 227        for_each_possible_cpu(cpu) {
 228                struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
 229
 230                fq_ring_for_each(idx, fq)
 231                        put_pages_list(&fq->entries[idx].freelist);
 232        }
 233
 234        free_percpu(cookie->fq);
 235}
 236
 237/* sysfs updates are serialised by the mutex of the group owning @domain */
 238int iommu_dma_init_fq(struct iommu_domain *domain)
 239{
 240        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 241        struct iova_fq __percpu *queue;
 242        int i, cpu;
 243
 244        if (cookie->fq_domain)
 245                return 0;
 246
 247        atomic64_set(&cookie->fq_flush_start_cnt,  0);
 248        atomic64_set(&cookie->fq_flush_finish_cnt, 0);
 249
 250        queue = alloc_percpu(struct iova_fq);
 251        if (!queue) {
 252                pr_warn("iova flush queue initialization failed\n");
 253                return -ENOMEM;
 254        }
 255
 256        for_each_possible_cpu(cpu) {
 257                struct iova_fq *fq = per_cpu_ptr(queue, cpu);
 258
 259                fq->head = 0;
 260                fq->tail = 0;
 261
 262                spin_lock_init(&fq->lock);
 263
 264                for (i = 0; i < IOVA_FQ_SIZE; i++)
 265                        INIT_LIST_HEAD(&fq->entries[i].freelist);
 266        }
 267
 268        cookie->fq = queue;
 269
 270        timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
 271        atomic_set(&cookie->fq_timer_on, 0);
 272        /*
 273         * Prevent incomplete fq state being observable. Pairs with path from
 274         * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
 275         */
 276        smp_wmb();
 277        WRITE_ONCE(cookie->fq_domain, domain);
 278        return 0;
 279}
 280
 281static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 282{
 283        if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
 284                return cookie->iovad.granule;
 285        return PAGE_SIZE;
 286}
 287
 288static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 289{
 290        struct iommu_dma_cookie *cookie;
 291
 292        cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
 293        if (cookie) {
 294                INIT_LIST_HEAD(&cookie->msi_page_list);
 295                cookie->type = type;
 296        }
 297        return cookie;
 298}
 299
 300/**
 301 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
 302 * @domain: IOMMU domain to prepare for DMA-API usage
 303 */
 304int iommu_get_dma_cookie(struct iommu_domain *domain)
 305{
 306        if (domain->iova_cookie)
 307                return -EEXIST;
 308
 309        domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
 310        if (!domain->iova_cookie)
 311                return -ENOMEM;
 312
 313        return 0;
 314}
 315
 316/**
 317 * iommu_get_msi_cookie - Acquire just MSI remapping resources
 318 * @domain: IOMMU domain to prepare
 319 * @base: Start address of IOVA region for MSI mappings
 320 *
 321 * Users who manage their own IOVA allocation and do not want DMA API support,
 322 * but would still like to take advantage of automatic MSI remapping, can use
 323 * this to initialise their own domain appropriately. Users should reserve a
 324 * contiguous IOVA region, starting at @base, large enough to accommodate the
 325 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
 326 * used by the devices attached to @domain.
 327 */
 328int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
 329{
 330        struct iommu_dma_cookie *cookie;
 331
 332        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
 333                return -EINVAL;
 334
 335        if (domain->iova_cookie)
 336                return -EEXIST;
 337
 338        cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
 339        if (!cookie)
 340                return -ENOMEM;
 341
 342        cookie->msi_iova = base;
 343        domain->iova_cookie = cookie;
 344        return 0;
 345}
 346EXPORT_SYMBOL(iommu_get_msi_cookie);
 347
 348/**
 349 * iommu_put_dma_cookie - Release a domain's DMA mapping resources
 350 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
 351 *          iommu_get_msi_cookie()
 352 */
 353void iommu_put_dma_cookie(struct iommu_domain *domain)
 354{
 355        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 356        struct iommu_dma_msi_page *msi, *tmp;
 357
 358        if (!cookie)
 359                return;
 360
 361        if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
 362                iommu_dma_free_fq(cookie);
 363                put_iova_domain(&cookie->iovad);
 364        }
 365
 366        list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
 367                list_del(&msi->list);
 368                kfree(msi);
 369        }
 370        kfree(cookie);
 371        domain->iova_cookie = NULL;
 372}
 373
 374/**
 375 * iommu_dma_get_resv_regions - Reserved region driver helper
 376 * @dev: Device from iommu_get_resv_regions()
 377 * @list: Reserved region list from iommu_get_resv_regions()
 378 *
 379 * IOMMU drivers can use this to implement their .get_resv_regions callback
 380 * for general non-IOMMU-specific reservations. Currently, this covers GICv3
 381 * ITS region reservation on ACPI based ARM platforms that may require HW MSI
 382 * reservation.
 383 */
 384void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 385{
 386
 387        if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
 388                iort_iommu_msi_get_resv_regions(dev, list);
 389
 390}
 391EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 392
 393static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
 394                phys_addr_t start, phys_addr_t end)
 395{
 396        struct iova_domain *iovad = &cookie->iovad;
 397        struct iommu_dma_msi_page *msi_page;
 398        int i, num_pages;
 399
 400        start -= iova_offset(iovad, start);
 401        num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
 402
 403        for (i = 0; i < num_pages; i++) {
 404                msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
 405                if (!msi_page)
 406                        return -ENOMEM;
 407
 408                msi_page->phys = start;
 409                msi_page->iova = start;
 410                INIT_LIST_HEAD(&msi_page->list);
 411                list_add(&msi_page->list, &cookie->msi_page_list);
 412                start += iovad->granule;
 413        }
 414
 415        return 0;
 416}
 417
 418static int iommu_dma_ranges_sort(void *priv, const struct list_head *a,
 419                const struct list_head *b)
 420{
 421        struct resource_entry *res_a = list_entry(a, typeof(*res_a), node);
 422        struct resource_entry *res_b = list_entry(b, typeof(*res_b), node);
 423
 424        return res_a->res->start > res_b->res->start;
 425}
 426
 427static int iova_reserve_pci_windows(struct pci_dev *dev,
 428                struct iova_domain *iovad)
 429{
 430        struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
 431        struct resource_entry *window;
 432        unsigned long lo, hi;
 433        phys_addr_t start = 0, end;
 434
 435        resource_list_for_each_entry(window, &bridge->windows) {
 436                if (resource_type(window->res) != IORESOURCE_MEM)
 437                        continue;
 438
 439                lo = iova_pfn(iovad, window->res->start - window->offset);
 440                hi = iova_pfn(iovad, window->res->end - window->offset);
 441                reserve_iova(iovad, lo, hi);
 442        }
 443
 444        /* Get reserved DMA windows from host bridge */
 445        list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort);
 446        resource_list_for_each_entry(window, &bridge->dma_ranges) {
 447                end = window->res->start - window->offset;
 448resv_iova:
 449                if (end > start) {
 450                        lo = iova_pfn(iovad, start);
 451                        hi = iova_pfn(iovad, end);
 452                        reserve_iova(iovad, lo, hi);
 453                } else if (end < start) {
 454                        /* DMA ranges should be non-overlapping */
 455                        dev_err(&dev->dev,
 456                                "Failed to reserve IOVA [%pa-%pa]\n",
 457                                &start, &end);
 458                        return -EINVAL;
 459                }
 460
 461                start = window->res->end - window->offset + 1;
 462                /* If window is last entry */
 463                if (window->node.next == &bridge->dma_ranges &&
 464                    end != ~(phys_addr_t)0) {
 465                        end = ~(phys_addr_t)0;
 466                        goto resv_iova;
 467                }
 468        }
 469
 470        return 0;
 471}
 472
 473static int iova_reserve_iommu_regions(struct device *dev,
 474                struct iommu_domain *domain)
 475{
 476        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 477        struct iova_domain *iovad = &cookie->iovad;
 478        struct iommu_resv_region *region;
 479        LIST_HEAD(resv_regions);
 480        int ret = 0;
 481
 482        if (dev_is_pci(dev)) {
 483                ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
 484                if (ret)
 485                        return ret;
 486        }
 487
 488        iommu_get_resv_regions(dev, &resv_regions);
 489        list_for_each_entry(region, &resv_regions, list) {
 490                unsigned long lo, hi;
 491
 492                /* We ARE the software that manages these! */
 493                if (region->type == IOMMU_RESV_SW_MSI)
 494                        continue;
 495
 496                lo = iova_pfn(iovad, region->start);
 497                hi = iova_pfn(iovad, region->start + region->length - 1);
 498                reserve_iova(iovad, lo, hi);
 499
 500                if (region->type == IOMMU_RESV_MSI)
 501                        ret = cookie_init_hw_msi_region(cookie, region->start,
 502                                        region->start + region->length);
 503                if (ret)
 504                        break;
 505        }
 506        iommu_put_resv_regions(dev, &resv_regions);
 507
 508        return ret;
 509}
 510
 511static bool dev_is_untrusted(struct device *dev)
 512{
 513        return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 514}
 515
 516static bool dev_use_swiotlb(struct device *dev)
 517{
 518        return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
 519}
 520
 521/**
 522 * iommu_dma_init_domain - Initialise a DMA mapping domain
 523 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
 524 * @base: IOVA at which the mappable address space starts
 525 * @limit: Last address of the IOVA space
 526 * @dev: Device the domain is being initialised for
 527 *
 528 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
 529 * avoid rounding surprises. If necessary, we reserve the page at address 0
 530 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
 531 * any change which could make prior IOVAs invalid will fail.
 532 */
 533static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 534                                 dma_addr_t limit, struct device *dev)
 535{
 536        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 537        unsigned long order, base_pfn;
 538        struct iova_domain *iovad;
 539        int ret;
 540
 541        if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 542                return -EINVAL;
 543
 544        iovad = &cookie->iovad;
 545
 546        /* Use the smallest supported page size for IOVA granularity */
 547        order = __ffs(domain->pgsize_bitmap);
 548        base_pfn = max_t(unsigned long, 1, base >> order);
 549
 550        /* Check the domain allows at least some access to the device... */
 551        if (domain->geometry.force_aperture) {
 552                if (base > domain->geometry.aperture_end ||
 553                    limit < domain->geometry.aperture_start) {
 554                        pr_warn("specified DMA range outside IOMMU capability\n");
 555                        return -EFAULT;
 556                }
 557                /* ...then finally give it a kicking to make sure it fits */
 558                base_pfn = max_t(unsigned long, base_pfn,
 559                                domain->geometry.aperture_start >> order);
 560        }
 561
 562        /* start_pfn is always nonzero for an already-initialised domain */
 563        if (iovad->start_pfn) {
 564                if (1UL << order != iovad->granule ||
 565                    base_pfn != iovad->start_pfn) {
 566                        pr_warn("Incompatible range for DMA domain\n");
 567                        return -EFAULT;
 568                }
 569
 570                return 0;
 571        }
 572
 573        init_iova_domain(iovad, 1UL << order, base_pfn);
 574        ret = iova_domain_init_rcaches(iovad);
 575        if (ret)
 576                return ret;
 577
 578        /* If the FQ fails we can simply fall back to strict mode */
 579        if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
 580                domain->type = IOMMU_DOMAIN_DMA;
 581
 582        return iova_reserve_iommu_regions(dev, domain);
 583}
 584
 585/**
 586 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
 587 *                    page flags.
 588 * @dir: Direction of DMA transfer
 589 * @coherent: Is the DMA master cache-coherent?
 590 * @attrs: DMA attributes for the mapping
 591 *
 592 * Return: corresponding IOMMU API page protection flags
 593 */
 594static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 595                     unsigned long attrs)
 596{
 597        int prot = coherent ? IOMMU_CACHE : 0;
 598
 599        if (attrs & DMA_ATTR_PRIVILEGED)
 600                prot |= IOMMU_PRIV;
 601
 602        switch (dir) {
 603        case DMA_BIDIRECTIONAL:
 604                return prot | IOMMU_READ | IOMMU_WRITE;
 605        case DMA_TO_DEVICE:
 606                return prot | IOMMU_READ;
 607        case DMA_FROM_DEVICE:
 608                return prot | IOMMU_WRITE;
 609        default:
 610                return 0;
 611        }
 612}
 613
 614static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 615                size_t size, u64 dma_limit, struct device *dev)
 616{
 617        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 618        struct iova_domain *iovad = &cookie->iovad;
 619        unsigned long shift, iova_len, iova = 0;
 620
 621        if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
 622                cookie->msi_iova += size;
 623                return cookie->msi_iova - size;
 624        }
 625
 626        shift = iova_shift(iovad);
 627        iova_len = size >> shift;
 628
 629        dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
 630
 631        if (domain->geometry.force_aperture)
 632                dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
 633
 634        /* Try to get PCI devices a SAC address */
 635        if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
 636                iova = alloc_iova_fast(iovad, iova_len,
 637                                       DMA_BIT_MASK(32) >> shift, false);
 638
 639        if (!iova)
 640                iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
 641                                       true);
 642
 643        return (dma_addr_t)iova << shift;
 644}
 645
 646static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 647                dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
 648{
 649        struct iova_domain *iovad = &cookie->iovad;
 650
 651        /* The MSI case is only ever cleaning up its most recent allocation */
 652        if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 653                cookie->msi_iova -= size;
 654        else if (gather && gather->queued)
 655                queue_iova(cookie, iova_pfn(iovad, iova),
 656                                size >> iova_shift(iovad),
 657                                &gather->freelist);
 658        else
 659                free_iova_fast(iovad, iova_pfn(iovad, iova),
 660                                size >> iova_shift(iovad));
 661}
 662
 663static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
 664                size_t size)
 665{
 666        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 667        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 668        struct iova_domain *iovad = &cookie->iovad;
 669        size_t iova_off = iova_offset(iovad, dma_addr);
 670        struct iommu_iotlb_gather iotlb_gather;
 671        size_t unmapped;
 672
 673        dma_addr -= iova_off;
 674        size = iova_align(iovad, size + iova_off);
 675        iommu_iotlb_gather_init(&iotlb_gather);
 676        iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
 677
 678        unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
 679        WARN_ON(unmapped != size);
 680
 681        if (!iotlb_gather.queued)
 682                iommu_iotlb_sync(domain, &iotlb_gather);
 683        iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
 684}
 685
 686static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 687                size_t size, int prot, u64 dma_mask)
 688{
 689        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 690        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 691        struct iova_domain *iovad = &cookie->iovad;
 692        size_t iova_off = iova_offset(iovad, phys);
 693        dma_addr_t iova;
 694
 695        if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 696            iommu_deferred_attach(dev, domain))
 697                return DMA_MAPPING_ERROR;
 698
 699        size = iova_align(iovad, size + iova_off);
 700
 701        iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
 702        if (!iova)
 703                return DMA_MAPPING_ERROR;
 704
 705        if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
 706                iommu_dma_free_iova(cookie, iova, size, NULL);
 707                return DMA_MAPPING_ERROR;
 708        }
 709        return iova + iova_off;
 710}
 711
 712static void __iommu_dma_free_pages(struct page **pages, int count)
 713{
 714        while (count--)
 715                __free_page(pages[count]);
 716        kvfree(pages);
 717}
 718
 719static struct page **__iommu_dma_alloc_pages(struct device *dev,
 720                unsigned int count, unsigned long order_mask, gfp_t gfp)
 721{
 722        struct page **pages;
 723        unsigned int i = 0, nid = dev_to_node(dev);
 724
 725        order_mask &= (2U << MAX_ORDER) - 1;
 726        if (!order_mask)
 727                return NULL;
 728
 729        pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
 730        if (!pages)
 731                return NULL;
 732
 733        /* IOMMU can map any pages, so himem can also be used here */
 734        gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
 735
 736        /* It makes no sense to muck about with huge pages */
 737        gfp &= ~__GFP_COMP;
 738
 739        while (count) {
 740                struct page *page = NULL;
 741                unsigned int order_size;
 742
 743                /*
 744                 * Higher-order allocations are a convenience rather
 745                 * than a necessity, hence using __GFP_NORETRY until
 746                 * falling back to minimum-order allocations.
 747                 */
 748                for (order_mask &= (2U << __fls(count)) - 1;
 749                     order_mask; order_mask &= ~order_size) {
 750                        unsigned int order = __fls(order_mask);
 751                        gfp_t alloc_flags = gfp;
 752
 753                        order_size = 1U << order;
 754                        if (order_mask > order_size)
 755                                alloc_flags |= __GFP_NORETRY;
 756                        page = alloc_pages_node(nid, alloc_flags, order);
 757                        if (!page)
 758                                continue;
 759                        if (order)
 760                                split_page(page, order);
 761                        break;
 762                }
 763                if (!page) {
 764                        __iommu_dma_free_pages(pages, i);
 765                        return NULL;
 766                }
 767                count -= order_size;
 768                while (order_size--)
 769                        pages[i++] = page++;
 770        }
 771        return pages;
 772}
 773
 774/*
 775 * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
 776 * but an IOMMU which supports smaller pages might not map the whole thing.
 777 */
 778static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
 779                size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
 780                unsigned long attrs)
 781{
 782        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 783        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 784        struct iova_domain *iovad = &cookie->iovad;
 785        bool coherent = dev_is_dma_coherent(dev);
 786        int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
 787        unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 788        struct page **pages;
 789        dma_addr_t iova;
 790        ssize_t ret;
 791
 792        if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 793            iommu_deferred_attach(dev, domain))
 794                return NULL;
 795
 796        min_size = alloc_sizes & -alloc_sizes;
 797        if (min_size < PAGE_SIZE) {
 798                min_size = PAGE_SIZE;
 799                alloc_sizes |= PAGE_SIZE;
 800        } else {
 801                size = ALIGN(size, min_size);
 802        }
 803        if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
 804                alloc_sizes = min_size;
 805
 806        count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 807        pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
 808                                        gfp);
 809        if (!pages)
 810                return NULL;
 811
 812        size = iova_align(iovad, size);
 813        iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
 814        if (!iova)
 815                goto out_free_pages;
 816
 817        if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
 818                goto out_free_iova;
 819
 820        if (!(ioprot & IOMMU_CACHE)) {
 821                struct scatterlist *sg;
 822                int i;
 823
 824                for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
 825                        arch_dma_prep_coherent(sg_page(sg), sg->length);
 826        }
 827
 828        ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
 829        if (ret < 0 || ret < size)
 830                goto out_free_sg;
 831
 832        sgt->sgl->dma_address = iova;
 833        sgt->sgl->dma_length = size;
 834        return pages;
 835
 836out_free_sg:
 837        sg_free_table(sgt);
 838out_free_iova:
 839        iommu_dma_free_iova(cookie, iova, size, NULL);
 840out_free_pages:
 841        __iommu_dma_free_pages(pages, count);
 842        return NULL;
 843}
 844
 845static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
 846                dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
 847                unsigned long attrs)
 848{
 849        struct page **pages;
 850        struct sg_table sgt;
 851        void *vaddr;
 852
 853        pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
 854                                                attrs);
 855        if (!pages)
 856                return NULL;
 857        *dma_handle = sgt.sgl->dma_address;
 858        sg_free_table(&sgt);
 859        vaddr = dma_common_pages_remap(pages, size, prot,
 860                        __builtin_return_address(0));
 861        if (!vaddr)
 862                goto out_unmap;
 863        return vaddr;
 864
 865out_unmap:
 866        __iommu_dma_unmap(dev, *dma_handle, size);
 867        __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 868        return NULL;
 869}
 870
 871static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
 872                size_t size, enum dma_data_direction dir, gfp_t gfp,
 873                unsigned long attrs)
 874{
 875        struct dma_sgt_handle *sh;
 876
 877        sh = kmalloc(sizeof(*sh), gfp);
 878        if (!sh)
 879                return NULL;
 880
 881        sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
 882                                                    PAGE_KERNEL, attrs);
 883        if (!sh->pages) {
 884                kfree(sh);
 885                return NULL;
 886        }
 887        return &sh->sgt;
 888}
 889
 890static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
 891                struct sg_table *sgt, enum dma_data_direction dir)
 892{
 893        struct dma_sgt_handle *sh = sgt_handle(sgt);
 894
 895        __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
 896        __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 897        sg_free_table(&sh->sgt);
 898        kfree(sh);
 899}
 900
 901static void iommu_dma_sync_single_for_cpu(struct device *dev,
 902                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 903{
 904        phys_addr_t phys;
 905
 906        if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 907                return;
 908
 909        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
 910        if (!dev_is_dma_coherent(dev))
 911                arch_sync_dma_for_cpu(phys, size, dir);
 912
 913        if (is_swiotlb_buffer(dev, phys))
 914                swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 915}
 916
 917static void iommu_dma_sync_single_for_device(struct device *dev,
 918                dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 919{
 920        phys_addr_t phys;
 921
 922        if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 923                return;
 924
 925        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
 926        if (is_swiotlb_buffer(dev, phys))
 927                swiotlb_sync_single_for_device(dev, phys, size, dir);
 928
 929        if (!dev_is_dma_coherent(dev))
 930                arch_sync_dma_for_device(phys, size, dir);
 931}
 932
 933static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 934                struct scatterlist *sgl, int nelems,
 935                enum dma_data_direction dir)
 936{
 937        struct scatterlist *sg;
 938        int i;
 939
 940        if (dev_use_swiotlb(dev))
 941                for_each_sg(sgl, sg, nelems, i)
 942                        iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 943                                                      sg->length, dir);
 944        else if (!dev_is_dma_coherent(dev))
 945                for_each_sg(sgl, sg, nelems, i)
 946                        arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 947}
 948
 949static void iommu_dma_sync_sg_for_device(struct device *dev,
 950                struct scatterlist *sgl, int nelems,
 951                enum dma_data_direction dir)
 952{
 953        struct scatterlist *sg;
 954        int i;
 955
 956        if (dev_use_swiotlb(dev))
 957                for_each_sg(sgl, sg, nelems, i)
 958                        iommu_dma_sync_single_for_device(dev,
 959                                                         sg_dma_address(sg),
 960                                                         sg->length, dir);
 961        else if (!dev_is_dma_coherent(dev))
 962                for_each_sg(sgl, sg, nelems, i)
 963                        arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
 964}
 965
 966static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 967                unsigned long offset, size_t size, enum dma_data_direction dir,
 968                unsigned long attrs)
 969{
 970        phys_addr_t phys = page_to_phys(page) + offset;
 971        bool coherent = dev_is_dma_coherent(dev);
 972        int prot = dma_info_to_prot(dir, coherent, attrs);
 973        struct iommu_domain *domain = iommu_get_dma_domain(dev);
 974        struct iommu_dma_cookie *cookie = domain->iova_cookie;
 975        struct iova_domain *iovad = &cookie->iovad;
 976        dma_addr_t iova, dma_mask = dma_get_mask(dev);
 977
 978        /*
 979         * If both the physical buffer start address and size are
 980         * page aligned, we don't need to use a bounce page.
 981         */
 982        if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
 983                void *padding_start;
 984                size_t padding_size, aligned_size;
 985
 986                if (!is_swiotlb_active(dev)) {
 987                        dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
 988                        return DMA_MAPPING_ERROR;
 989                }
 990
 991                aligned_size = iova_align(iovad, size);
 992                phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
 993                                              iova_mask(iovad), dir, attrs);
 994
 995                if (phys == DMA_MAPPING_ERROR)
 996                        return DMA_MAPPING_ERROR;
 997
 998                /* Cleanup the padding area. */
 999                padding_start = phys_to_virt(phys);
1000                padding_size = aligned_size;
1001
1002                if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
1003                    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
1004                        padding_start += size;
1005                        padding_size -= size;
1006                }
1007
1008                memset(padding_start, 0, padding_size);
1009        }
1010
1011        if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1012                arch_sync_dma_for_device(phys, size, dir);
1013
1014        iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
1015        if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
1016                swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1017        return iova;
1018}
1019
1020static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
1021                size_t size, enum dma_data_direction dir, unsigned long attrs)
1022{
1023        struct iommu_domain *domain = iommu_get_dma_domain(dev);
1024        phys_addr_t phys;
1025
1026        phys = iommu_iova_to_phys(domain, dma_handle);
1027        if (WARN_ON(!phys))
1028                return;
1029
1030        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
1031                arch_sync_dma_for_cpu(phys, size, dir);
1032
1033        __iommu_dma_unmap(dev, dma_handle, size);
1034
1035        if (unlikely(is_swiotlb_buffer(dev, phys)))
1036                swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1037}
1038
1039/*
1040 * Prepare a successfully-mapped scatterlist to give back to the caller.
1041 *
1042 * At this point the segments are already laid out by iommu_dma_map_sg() to
1043 * avoid individually crossing any boundaries, so we merely need to check a
1044 * segment's start address to avoid concatenating across one.
1045 */
1046static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
1047                dma_addr_t dma_addr)
1048{
1049        struct scatterlist *s, *cur = sg;
1050        unsigned long seg_mask = dma_get_seg_boundary(dev);
1051        unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
1052        int i, count = 0;
1053
1054        for_each_sg(sg, s, nents, i) {
1055                /* Restore this segment's original unaligned fields first */
1056                unsigned int s_iova_off = sg_dma_address(s);
1057                unsigned int s_length = sg_dma_len(s);
1058                unsigned int s_iova_len = s->length;
1059
1060                s->offset += s_iova_off;
1061                s->length = s_length;
1062                sg_dma_address(s) = DMA_MAPPING_ERROR;
1063                sg_dma_len(s) = 0;
1064
1065                /*
1066                 * Now fill in the real DMA data. If...
1067                 * - there is a valid output segment to append to
1068                 * - and this segment starts on an IOVA page boundary
1069                 * - but doesn't fall at a segment boundary
1070                 * - and wouldn't make the resulting output segment too long
1071                 */
1072                if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
1073                    (max_len - cur_len >= s_length)) {
1074                        /* ...then concatenate it with the previous one */
1075                        cur_len += s_length;
1076                } else {
1077                        /* Otherwise start the next output segment */
1078                        if (i > 0)
1079                                cur = sg_next(cur);
1080                        cur_len = s_length;
1081                        count++;
1082
1083                        sg_dma_address(cur) = dma_addr + s_iova_off;
1084                }
1085
1086                sg_dma_len(cur) = cur_len;
1087                dma_addr += s_iova_len;
1088
1089                if (s_length + s_iova_off < s_iova_len)
1090                        cur_len = 0;
1091        }
1092        return count;
1093}
1094
1095/*
1096 * If mapping failed, then just restore the original list,
1097 * but making sure the DMA fields are invalidated.
1098 */
1099static void __invalidate_sg(struct scatterlist *sg, int nents)
1100{
1101        struct scatterlist *s;
1102        int i;
1103
1104        for_each_sg(sg, s, nents, i) {
1105                if (sg_dma_address(s) != DMA_MAPPING_ERROR)
1106                        s->offset += sg_dma_address(s);
1107                if (sg_dma_len(s))
1108                        s->length = sg_dma_len(s);
1109                sg_dma_address(s) = DMA_MAPPING_ERROR;
1110                sg_dma_len(s) = 0;
1111        }
1112}
1113
1114static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1115                int nents, enum dma_data_direction dir, unsigned long attrs)
1116{
1117        struct scatterlist *s;
1118        int i;
1119
1120        for_each_sg(sg, s, nents, i)
1121                iommu_dma_unmap_page(dev, sg_dma_address(s),
1122                                sg_dma_len(s), dir, attrs);
1123}
1124
1125static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1126                int nents, enum dma_data_direction dir, unsigned long attrs)
1127{
1128        struct scatterlist *s;
1129        int i;
1130
1131        for_each_sg(sg, s, nents, i) {
1132                sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
1133                                s->offset, s->length, dir, attrs);
1134                if (sg_dma_address(s) == DMA_MAPPING_ERROR)
1135                        goto out_unmap;
1136                sg_dma_len(s) = s->length;
1137        }
1138
1139        return nents;
1140
1141out_unmap:
1142        iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
1143        return -EIO;
1144}
1145
1146/*
1147 * The DMA API client is passing in a scatterlist which could describe
1148 * any old buffer layout, but the IOMMU API requires everything to be
1149 * aligned to IOMMU pages. Hence the need for this complicated bit of
1150 * impedance-matching, to be able to hand off a suitably-aligned list,
1151 * but still preserve the original offsets and sizes for the caller.
1152 */
1153static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
1154                int nents, enum dma_data_direction dir, unsigned long attrs)
1155{
1156        struct iommu_domain *domain = iommu_get_dma_domain(dev);
1157        struct iommu_dma_cookie *cookie = domain->iova_cookie;
1158        struct iova_domain *iovad = &cookie->iovad;
1159        struct scatterlist *s, *prev = NULL;
1160        int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
1161        dma_addr_t iova;
1162        size_t iova_len = 0;
1163        unsigned long mask = dma_get_seg_boundary(dev);
1164        ssize_t ret;
1165        int i;
1166
1167        if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
1168                ret = iommu_deferred_attach(dev, domain);
1169                if (ret)
1170                        goto out;
1171        }
1172
1173        if (dev_use_swiotlb(dev))
1174                return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
1175
1176        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1177                iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
1178
1179        /*
1180         * Work out how much IOVA space we need, and align the segments to
1181         * IOVA granules for the IOMMU driver to handle. With some clever
1182         * trickery we can modify the list in-place, but reversibly, by
1183         * stashing the unaligned parts in the as-yet-unused DMA fields.
1184         */
1185        for_each_sg(sg, s, nents, i) {
1186                size_t s_iova_off = iova_offset(iovad, s->offset);
1187                size_t s_length = s->length;
1188                size_t pad_len = (mask - iova_len + 1) & mask;
1189
1190                sg_dma_address(s) = s_iova_off;
1191                sg_dma_len(s) = s_length;
1192                s->offset -= s_iova_off;
1193                s_length = iova_align(iovad, s_length + s_iova_off);
1194                s->length = s_length;
1195
1196                /*
1197                 * Due to the alignment of our single IOVA allocation, we can
1198                 * depend on these assumptions about the segment boundary mask:
1199                 * - If mask size >= IOVA size, then the IOVA range cannot
1200                 *   possibly fall across a boundary, so we don't care.
1201                 * - If mask size < IOVA size, then the IOVA range must start
1202                 *   exactly on a boundary, therefore we can lay things out
1203                 *   based purely on segment lengths without needing to know
1204                 *   the actual addresses beforehand.
1205                 * - The mask must be a power of 2, so pad_len == 0 if
1206                 *   iova_len == 0, thus we cannot dereference prev the first
1207                 *   time through here (i.e. before it has a meaningful value).
1208                 */
1209                if (pad_len && pad_len < s_length - 1) {
1210                        prev->length += pad_len;
1211                        iova_len += pad_len;
1212                }
1213
1214                iova_len += s_length;
1215                prev = s;
1216        }
1217
1218        iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
1219        if (!iova) {
1220                ret = -ENOMEM;
1221                goto out_restore_sg;
1222        }
1223
1224        /*
1225         * We'll leave any physical concatenation to the IOMMU driver's
1226         * implementation - it knows better than we do.
1227         */
1228        ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
1229        if (ret < 0 || ret < iova_len)
1230                goto out_free_iova;
1231
1232        return __finalise_sg(dev, sg, nents, iova);
1233
1234out_free_iova:
1235        iommu_dma_free_iova(cookie, iova, iova_len, NULL);
1236out_restore_sg:
1237        __invalidate_sg(sg, nents);
1238out:
1239        if (ret != -ENOMEM)
1240                return -EINVAL;
1241        return ret;
1242}
1243
1244static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
1245                int nents, enum dma_data_direction dir, unsigned long attrs)
1246{
1247        dma_addr_t start, end;
1248        struct scatterlist *tmp;
1249        int i;
1250
1251        if (dev_use_swiotlb(dev)) {
1252                iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
1253                return;
1254        }
1255
1256        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1257                iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
1258
1259        /*
1260         * The scatterlist segments are mapped into a single
1261         * contiguous IOVA allocation, so this is incredibly easy.
1262         */
1263        start = sg_dma_address(sg);
1264        for_each_sg(sg_next(sg), tmp, nents - 1, i) {
1265                if (sg_dma_len(tmp) == 0)
1266                        break;
1267                sg = tmp;
1268        }
1269        end = sg_dma_address(sg) + sg_dma_len(sg);
1270        __iommu_dma_unmap(dev, start, end - start);
1271}
1272
1273static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
1274                size_t size, enum dma_data_direction dir, unsigned long attrs)
1275{
1276        return __iommu_dma_map(dev, phys, size,
1277                        dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
1278                        dma_get_mask(dev));
1279}
1280
1281static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
1282                size_t size, enum dma_data_direction dir, unsigned long attrs)
1283{
1284        __iommu_dma_unmap(dev, handle, size);
1285}
1286
1287static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
1288{
1289        size_t alloc_size = PAGE_ALIGN(size);
1290        int count = alloc_size >> PAGE_SHIFT;
1291        struct page *page = NULL, **pages = NULL;
1292
1293        /* Non-coherent atomic allocation? Easy */
1294        if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1295            dma_free_from_pool(dev, cpu_addr, alloc_size))
1296                return;
1297
1298        if (is_vmalloc_addr(cpu_addr)) {
1299                /*
1300                 * If it the address is remapped, then it's either non-coherent
1301                 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
1302                 */
1303                pages = dma_common_find_pages(cpu_addr);
1304                if (!pages)
1305                        page = vmalloc_to_page(cpu_addr);
1306                dma_common_free_remap(cpu_addr, alloc_size);
1307        } else {
1308                /* Lowmem means a coherent atomic or CMA allocation */
1309                page = virt_to_page(cpu_addr);
1310        }
1311
1312        if (pages)
1313                __iommu_dma_free_pages(pages, count);
1314        if (page)
1315                dma_free_contiguous(dev, page, alloc_size);
1316}
1317
1318static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
1319                dma_addr_t handle, unsigned long attrs)
1320{
1321        __iommu_dma_unmap(dev, handle, size);
1322        __iommu_dma_free(dev, size, cpu_addr);
1323}
1324
1325static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
1326                struct page **pagep, gfp_t gfp, unsigned long attrs)
1327{
1328        bool coherent = dev_is_dma_coherent(dev);
1329        size_t alloc_size = PAGE_ALIGN(size);
1330        int node = dev_to_node(dev);
1331        struct page *page = NULL;
1332        void *cpu_addr;
1333
1334        page = dma_alloc_contiguous(dev, alloc_size, gfp);
1335        if (!page)
1336                page = alloc_pages_node(node, gfp, get_order(alloc_size));
1337        if (!page)
1338                return NULL;
1339
1340        if (!coherent || PageHighMem(page)) {
1341                pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
1342
1343                cpu_addr = dma_common_contiguous_remap(page, alloc_size,
1344                                prot, __builtin_return_address(0));
1345                if (!cpu_addr)
1346                        goto out_free_pages;
1347
1348                if (!coherent)
1349                        arch_dma_prep_coherent(page, size);
1350        } else {
1351                cpu_addr = page_address(page);
1352        }
1353
1354        *pagep = page;
1355        memset(cpu_addr, 0, alloc_size);
1356        return cpu_addr;
1357out_free_pages:
1358        dma_free_contiguous(dev, page, alloc_size);
1359        return NULL;
1360}
1361
1362static void *iommu_dma_alloc(struct device *dev, size_t size,
1363                dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1364{
1365        bool coherent = dev_is_dma_coherent(dev);
1366        int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
1367        struct page *page = NULL;
1368        void *cpu_addr;
1369
1370        gfp |= __GFP_ZERO;
1371
1372        if (gfpflags_allow_blocking(gfp) &&
1373            !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
1374                return iommu_dma_alloc_remap(dev, size, handle, gfp,
1375                                dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
1376        }
1377
1378        if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1379            !gfpflags_allow_blocking(gfp) && !coherent)
1380                page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
1381                                               gfp, NULL);
1382        else
1383                cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
1384        if (!cpu_addr)
1385                return NULL;
1386
1387        *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
1388                        dev->coherent_dma_mask);
1389        if (*handle == DMA_MAPPING_ERROR) {
1390                __iommu_dma_free(dev, size, cpu_addr);
1391                return NULL;
1392        }
1393
1394        return cpu_addr;
1395}
1396
1397static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1398                void *cpu_addr, dma_addr_t dma_addr, size_t size,
1399                unsigned long attrs)
1400{
1401        unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1402        unsigned long pfn, off = vma->vm_pgoff;
1403        int ret;
1404
1405        vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
1406
1407        if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
1408                return ret;
1409
1410        if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
1411                return -ENXIO;
1412
1413        if (is_vmalloc_addr(cpu_addr)) {
1414                struct page **pages = dma_common_find_pages(cpu_addr);
1415
1416                if (pages)
1417                        return vm_map_pages(vma, pages, nr_pages);
1418                pfn = vmalloc_to_pfn(cpu_addr);
1419        } else {
1420                pfn = page_to_pfn(virt_to_page(cpu_addr));
1421        }
1422
1423        return remap_pfn_range(vma, vma->vm_start, pfn + off,
1424                               vma->vm_end - vma->vm_start,
1425                               vma->vm_page_prot);
1426}
1427
1428static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1429                void *cpu_addr, dma_addr_t dma_addr, size_t size,
1430                unsigned long attrs)
1431{
1432        struct page *page;
1433        int ret;
1434
1435        if (is_vmalloc_addr(cpu_addr)) {
1436                struct page **pages = dma_common_find_pages(cpu_addr);
1437
1438                if (pages) {
1439                        return sg_alloc_table_from_pages(sgt, pages,
1440                                        PAGE_ALIGN(size) >> PAGE_SHIFT,
1441                                        0, size, GFP_KERNEL);
1442                }
1443
1444                page = vmalloc_to_page(cpu_addr);
1445        } else {
1446                page = virt_to_page(cpu_addr);
1447        }
1448
1449        ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
1450        if (!ret)
1451                sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
1452        return ret;
1453}
1454
1455static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1456{
1457        struct iommu_domain *domain = iommu_get_dma_domain(dev);
1458
1459        return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1460}
1461
1462static const struct dma_map_ops iommu_dma_ops = {
1463        .alloc                  = iommu_dma_alloc,
1464        .free                   = iommu_dma_free,
1465        .alloc_pages            = dma_common_alloc_pages,
1466        .free_pages             = dma_common_free_pages,
1467        .alloc_noncontiguous    = iommu_dma_alloc_noncontiguous,
1468        .free_noncontiguous     = iommu_dma_free_noncontiguous,
1469        .mmap                   = iommu_dma_mmap,
1470        .get_sgtable            = iommu_dma_get_sgtable,
1471        .map_page               = iommu_dma_map_page,
1472        .unmap_page             = iommu_dma_unmap_page,
1473        .map_sg                 = iommu_dma_map_sg,
1474        .unmap_sg               = iommu_dma_unmap_sg,
1475        .sync_single_for_cpu    = iommu_dma_sync_single_for_cpu,
1476        .sync_single_for_device = iommu_dma_sync_single_for_device,
1477        .sync_sg_for_cpu        = iommu_dma_sync_sg_for_cpu,
1478        .sync_sg_for_device     = iommu_dma_sync_sg_for_device,
1479        .map_resource           = iommu_dma_map_resource,
1480        .unmap_resource         = iommu_dma_unmap_resource,
1481        .get_merge_boundary     = iommu_dma_get_merge_boundary,
1482};
1483
1484/*
1485 * The IOMMU core code allocates the default DMA domain, which the underlying
1486 * IOMMU driver needs to support via the dma-iommu layer.
1487 */
1488void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
1489{
1490        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1491
1492        if (!domain)
1493                goto out_err;
1494
1495        /*
1496         * The IOMMU core code allocates the default DMA domain, which the
1497         * underlying IOMMU driver needs to support via the dma-iommu layer.
1498         */
1499        if (iommu_is_dma_domain(domain)) {
1500                if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
1501                        goto out_err;
1502                dev->dma_ops = &iommu_dma_ops;
1503        }
1504
1505        return;
1506out_err:
1507         pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
1508                 dev_name(dev));
1509}
1510EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
1511
1512static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
1513                phys_addr_t msi_addr, struct iommu_domain *domain)
1514{
1515        struct iommu_dma_cookie *cookie = domain->iova_cookie;
1516        struct iommu_dma_msi_page *msi_page;
1517        dma_addr_t iova;
1518        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1519        size_t size = cookie_msi_granule(cookie);
1520
1521        msi_addr &= ~(phys_addr_t)(size - 1);
1522        list_for_each_entry(msi_page, &cookie->msi_page_list, list)
1523                if (msi_page->phys == msi_addr)
1524                        return msi_page;
1525
1526        msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
1527        if (!msi_page)
1528                return NULL;
1529
1530        iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
1531        if (!iova)
1532                goto out_free_page;
1533
1534        if (iommu_map(domain, iova, msi_addr, size, prot))
1535                goto out_free_iova;
1536
1537        INIT_LIST_HEAD(&msi_page->list);
1538        msi_page->phys = msi_addr;
1539        msi_page->iova = iova;
1540        list_add(&msi_page->list, &cookie->msi_page_list);
1541        return msi_page;
1542
1543out_free_iova:
1544        iommu_dma_free_iova(cookie, iova, size, NULL);
1545out_free_page:
1546        kfree(msi_page);
1547        return NULL;
1548}
1549
1550int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
1551{
1552        struct device *dev = msi_desc_to_dev(desc);
1553        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1554        struct iommu_dma_msi_page *msi_page;
1555        static DEFINE_MUTEX(msi_prepare_lock); /* see below */
1556
1557        if (!domain || !domain->iova_cookie) {
1558                desc->iommu_cookie = NULL;
1559                return 0;
1560        }
1561
1562        /*
1563         * In fact the whole prepare operation should already be serialised by
1564         * irq_domain_mutex further up the callchain, but that's pretty subtle
1565         * on its own, so consider this locking as failsafe documentation...
1566         */
1567        mutex_lock(&msi_prepare_lock);
1568        msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
1569        mutex_unlock(&msi_prepare_lock);
1570
1571        msi_desc_set_iommu_cookie(desc, msi_page);
1572
1573        if (!msi_page)
1574                return -ENOMEM;
1575        return 0;
1576}
1577
1578void iommu_dma_compose_msi_msg(struct msi_desc *desc,
1579                               struct msi_msg *msg)
1580{
1581        struct device *dev = msi_desc_to_dev(desc);
1582        const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1583        const struct iommu_dma_msi_page *msi_page;
1584
1585        msi_page = msi_desc_get_iommu_cookie(desc);
1586
1587        if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
1588                return;
1589
1590        msg->address_hi = upper_32_bits(msi_page->iova);
1591        msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
1592        msg->address_lo += lower_32_bits(msi_page->iova);
1593}
1594
1595static int iommu_dma_init(void)
1596{
1597        if (is_kdump_kernel())
1598                static_branch_enable(&iommu_deferred_attach_enabled);
1599
1600        return iova_cache_get();
1601}
1602arch_initcall(iommu_dma_init);
1603