linux/drivers/iommu/io-pgtable-arm.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * CPU-agnostic ARM page table allocator.
   4 *
   5 * Copyright (C) 2014 ARM Limited
   6 *
   7 * Author: Will Deacon <will.deacon@arm.com>
   8 */
   9
  10#define pr_fmt(fmt)     "arm-lpae io-pgtable: " fmt
  11
  12#include <linux/atomic.h>
  13#include <linux/bitops.h>
  14#include <linux/io-pgtable.h>
  15#include <linux/kernel.h>
  16#include <linux/sizes.h>
  17#include <linux/slab.h>
  18#include <linux/types.h>
  19#include <linux/dma-mapping.h>
  20
  21#include <asm/barrier.h>
  22
  23#include "io-pgtable-arm.h"
  24
  25#define ARM_LPAE_MAX_ADDR_BITS          52
  26#define ARM_LPAE_S2_MAX_CONCAT_PAGES    16
  27#define ARM_LPAE_MAX_LEVELS             4
  28
  29/* Struct accessors */
  30#define io_pgtable_to_data(x)                                           \
  31        container_of((x), struct arm_lpae_io_pgtable, iop)
  32
  33#define io_pgtable_ops_to_data(x)                                       \
  34        io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
  35
  36/*
  37 * Calculate the right shift amount to get to the portion describing level l
  38 * in a virtual address mapped by the pagetable in d.
  39 */
  40#define ARM_LPAE_LVL_SHIFT(l,d)                                         \
  41        (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) +          \
  42        ilog2(sizeof(arm_lpae_iopte)))
  43
  44#define ARM_LPAE_GRANULE(d)                                             \
  45        (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
  46#define ARM_LPAE_PGD_SIZE(d)                                            \
  47        (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
  48
  49/*
  50 * Calculate the index at level l used to map virtual address a using the
  51 * pagetable in d.
  52 */
  53#define ARM_LPAE_PGD_IDX(l,d)                                           \
  54        ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
  55
  56#define ARM_LPAE_LVL_IDX(a,l,d)                                         \
  57        (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) &                        \
  58         ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
  59
  60/* Calculate the block/page mapping size at level l for pagetable in d. */
  61#define ARM_LPAE_BLOCK_SIZE(l,d)        (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
  62
  63/* Page table bits */
  64#define ARM_LPAE_PTE_TYPE_SHIFT         0
  65#define ARM_LPAE_PTE_TYPE_MASK          0x3
  66
  67#define ARM_LPAE_PTE_TYPE_BLOCK         1
  68#define ARM_LPAE_PTE_TYPE_TABLE         3
  69#define ARM_LPAE_PTE_TYPE_PAGE          3
  70
  71#define ARM_LPAE_PTE_ADDR_MASK          GENMASK_ULL(47,12)
  72
  73#define ARM_LPAE_PTE_NSTABLE            (((arm_lpae_iopte)1) << 63)
  74#define ARM_LPAE_PTE_XN                 (((arm_lpae_iopte)3) << 53)
  75#define ARM_LPAE_PTE_AF                 (((arm_lpae_iopte)1) << 10)
  76#define ARM_LPAE_PTE_SH_NS              (((arm_lpae_iopte)0) << 8)
  77#define ARM_LPAE_PTE_SH_OS              (((arm_lpae_iopte)2) << 8)
  78#define ARM_LPAE_PTE_SH_IS              (((arm_lpae_iopte)3) << 8)
  79#define ARM_LPAE_PTE_NS                 (((arm_lpae_iopte)1) << 5)
  80#define ARM_LPAE_PTE_VALID              (((arm_lpae_iopte)1) << 0)
  81
  82#define ARM_LPAE_PTE_ATTR_LO_MASK       (((arm_lpae_iopte)0x3ff) << 2)
  83/* Ignore the contiguous bit for block splitting */
  84#define ARM_LPAE_PTE_ATTR_HI_MASK       (((arm_lpae_iopte)6) << 52)
  85#define ARM_LPAE_PTE_ATTR_MASK          (ARM_LPAE_PTE_ATTR_LO_MASK |    \
  86                                         ARM_LPAE_PTE_ATTR_HI_MASK)
  87/* Software bit for solving coherency races */
  88#define ARM_LPAE_PTE_SW_SYNC            (((arm_lpae_iopte)1) << 55)
  89
  90/* Stage-1 PTE */
  91#define ARM_LPAE_PTE_AP_UNPRIV          (((arm_lpae_iopte)1) << 6)
  92#define ARM_LPAE_PTE_AP_RDONLY          (((arm_lpae_iopte)2) << 6)
  93#define ARM_LPAE_PTE_ATTRINDX_SHIFT     2
  94#define ARM_LPAE_PTE_nG                 (((arm_lpae_iopte)1) << 11)
  95
  96/* Stage-2 PTE */
  97#define ARM_LPAE_PTE_HAP_FAULT          (((arm_lpae_iopte)0) << 6)
  98#define ARM_LPAE_PTE_HAP_READ           (((arm_lpae_iopte)1) << 6)
  99#define ARM_LPAE_PTE_HAP_WRITE          (((arm_lpae_iopte)2) << 6)
 100#define ARM_LPAE_PTE_MEMATTR_OIWB       (((arm_lpae_iopte)0xf) << 2)
 101#define ARM_LPAE_PTE_MEMATTR_NC         (((arm_lpae_iopte)0x5) << 2)
 102#define ARM_LPAE_PTE_MEMATTR_DEV        (((arm_lpae_iopte)0x1) << 2)
 103
 104/* Register bits */
 105#define ARM_LPAE_VTCR_SL0_MASK          0x3
 106
 107#define ARM_LPAE_TCR_T0SZ_SHIFT         0
 108
 109#define ARM_LPAE_VTCR_PS_SHIFT          16
 110#define ARM_LPAE_VTCR_PS_MASK           0x7
 111
 112#define ARM_LPAE_MAIR_ATTR_SHIFT(n)     ((n) << 3)
 113#define ARM_LPAE_MAIR_ATTR_MASK         0xff
 114#define ARM_LPAE_MAIR_ATTR_DEVICE       0x04
 115#define ARM_LPAE_MAIR_ATTR_NC           0x44
 116#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA   0xf4
 117#define ARM_LPAE_MAIR_ATTR_WBRWA        0xff
 118#define ARM_LPAE_MAIR_ATTR_IDX_NC       0
 119#define ARM_LPAE_MAIR_ATTR_IDX_CACHE    1
 120#define ARM_LPAE_MAIR_ATTR_IDX_DEV      2
 121#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE       3
 122
 123#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
 124#define ARM_MALI_LPAE_TTBR_READ_INNER   BIT(2)
 125#define ARM_MALI_LPAE_TTBR_SHARE_OUTER  BIT(4)
 126
 127#define ARM_MALI_LPAE_MEMATTR_IMP_DEF   0x88ULL
 128#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
 129
 130/* IOPTE accessors */
 131#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 132
 133#define iopte_type(pte)                                 \
 134        (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
 135
 136#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
 137
 138struct arm_lpae_io_pgtable {
 139        struct io_pgtable       iop;
 140
 141        int                     pgd_bits;
 142        int                     start_level;
 143        int                     bits_per_level;
 144
 145        void                    *pgd;
 146};
 147
 148typedef u64 arm_lpae_iopte;
 149
 150static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
 151                              enum io_pgtable_fmt fmt)
 152{
 153        if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
 154                return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE;
 155
 156        return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
 157}
 158
 159static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
 160                                     struct arm_lpae_io_pgtable *data)
 161{
 162        arm_lpae_iopte pte = paddr;
 163
 164        /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
 165        return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
 166}
 167
 168static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
 169                                  struct arm_lpae_io_pgtable *data)
 170{
 171        u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
 172
 173        if (ARM_LPAE_GRANULE(data) < SZ_64K)
 174                return paddr;
 175
 176        /* Rotate the packed high-order bits back to the top */
 177        return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
 178}
 179
 180static bool selftest_running = false;
 181
 182static dma_addr_t __arm_lpae_dma_addr(void *pages)
 183{
 184        return (dma_addr_t)virt_to_phys(pages);
 185}
 186
 187static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 188                                    struct io_pgtable_cfg *cfg)
 189{
 190        struct device *dev = cfg->iommu_dev;
 191        int order = get_order(size);
 192        struct page *p;
 193        dma_addr_t dma;
 194        void *pages;
 195
 196        VM_BUG_ON((gfp & __GFP_HIGHMEM));
 197        p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE,
 198                             gfp | __GFP_ZERO, order);
 199        if (!p)
 200                return NULL;
 201
 202        pages = page_address(p);
 203        if (!cfg->coherent_walk) {
 204                dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 205                if (dma_mapping_error(dev, dma))
 206                        goto out_free;
 207                /*
 208                 * We depend on the IOMMU being able to work with any physical
 209                 * address directly, so if the DMA layer suggests otherwise by
 210                 * translating or truncating them, that bodes very badly...
 211                 */
 212                if (dma != virt_to_phys(pages))
 213                        goto out_unmap;
 214        }
 215
 216        return pages;
 217
 218out_unmap:
 219        dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
 220        dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
 221out_free:
 222        __free_pages(p, order);
 223        return NULL;
 224}
 225
 226static void __arm_lpae_free_pages(void *pages, size_t size,
 227                                  struct io_pgtable_cfg *cfg)
 228{
 229        if (!cfg->coherent_walk)
 230                dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
 231                                 size, DMA_TO_DEVICE);
 232        free_pages((unsigned long)pages, get_order(size));
 233}
 234
 235static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
 236                                struct io_pgtable_cfg *cfg)
 237{
 238        dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
 239                                   sizeof(*ptep), DMA_TO_DEVICE);
 240}
 241
 242static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 243                               struct io_pgtable_cfg *cfg)
 244{
 245        *ptep = pte;
 246
 247        if (!cfg->coherent_walk)
 248                __arm_lpae_sync_pte(ptep, cfg);
 249}
 250
 251static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 252                               struct iommu_iotlb_gather *gather,
 253                               unsigned long iova, size_t size, int lvl,
 254                               arm_lpae_iopte *ptep);
 255
 256static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 257                                phys_addr_t paddr, arm_lpae_iopte prot,
 258                                int lvl, arm_lpae_iopte *ptep)
 259{
 260        arm_lpae_iopte pte = prot;
 261
 262        if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
 263                pte |= ARM_LPAE_PTE_TYPE_PAGE;
 264        else
 265                pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 266
 267        pte |= paddr_to_iopte(paddr, data);
 268
 269        __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
 270}
 271
 272static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 273                             unsigned long iova, phys_addr_t paddr,
 274                             arm_lpae_iopte prot, int lvl,
 275                             arm_lpae_iopte *ptep)
 276{
 277        arm_lpae_iopte pte = *ptep;
 278
 279        if (iopte_leaf(pte, lvl, data->iop.fmt)) {
 280                /* We require an unmap first */
 281                WARN_ON(!selftest_running);
 282                return -EEXIST;
 283        } else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
 284                /*
 285                 * We need to unmap and free the old table before
 286                 * overwriting it with a block entry.
 287                 */
 288                arm_lpae_iopte *tblp;
 289                size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 290
 291                tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
 292                if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
 293                        WARN_ON(1);
 294                        return -EINVAL;
 295                }
 296        }
 297
 298        __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
 299        return 0;
 300}
 301
 302static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 303                                             arm_lpae_iopte *ptep,
 304                                             arm_lpae_iopte curr,
 305                                             struct io_pgtable_cfg *cfg)
 306{
 307        arm_lpae_iopte old, new;
 308
 309        new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
 310        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 311                new |= ARM_LPAE_PTE_NSTABLE;
 312
 313        /*
 314         * Ensure the table itself is visible before its PTE can be.
 315         * Whilst we could get away with cmpxchg64_release below, this
 316         * doesn't have any ordering semantics when !CONFIG_SMP.
 317         */
 318        dma_wmb();
 319
 320        old = cmpxchg64_relaxed(ptep, curr, new);
 321
 322        if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
 323                return old;
 324
 325        /* Even if it's not ours, there's no point waiting; just kick it */
 326        __arm_lpae_sync_pte(ptep, cfg);
 327        if (old == curr)
 328                WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
 329
 330        return old;
 331}
 332
 333static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 334                          phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
 335                          int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
 336{
 337        arm_lpae_iopte *cptep, pte;
 338        size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 339        size_t tblsz = ARM_LPAE_GRANULE(data);
 340        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 341
 342        /* Find our entry at the current level */
 343        ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 344
 345        /* If we can install a leaf entry at this level, then do so */
 346        if (size == block_size)
 347                return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
 348
 349        /* We can't allocate tables at the final level */
 350        if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
 351                return -EINVAL;
 352
 353        /* Grab a pointer to the next level */
 354        pte = READ_ONCE(*ptep);
 355        if (!pte) {
 356                cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg);
 357                if (!cptep)
 358                        return -ENOMEM;
 359
 360                pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
 361                if (pte)
 362                        __arm_lpae_free_pages(cptep, tblsz, cfg);
 363        } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
 364                __arm_lpae_sync_pte(ptep, cfg);
 365        }
 366
 367        if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
 368                cptep = iopte_deref(pte, data);
 369        } else if (pte) {
 370                /* We require an unmap first */
 371                WARN_ON(!selftest_running);
 372                return -EEXIST;
 373        }
 374
 375        /* Rinse, repeat */
 376        return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
 377}
 378
 379static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 380                                           int prot)
 381{
 382        arm_lpae_iopte pte;
 383
 384        if (data->iop.fmt == ARM_64_LPAE_S1 ||
 385            data->iop.fmt == ARM_32_LPAE_S1) {
 386                pte = ARM_LPAE_PTE_nG;
 387                if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 388                        pte |= ARM_LPAE_PTE_AP_RDONLY;
 389                if (!(prot & IOMMU_PRIV))
 390                        pte |= ARM_LPAE_PTE_AP_UNPRIV;
 391        } else {
 392                pte = ARM_LPAE_PTE_HAP_FAULT;
 393                if (prot & IOMMU_READ)
 394                        pte |= ARM_LPAE_PTE_HAP_READ;
 395                if (prot & IOMMU_WRITE)
 396                        pte |= ARM_LPAE_PTE_HAP_WRITE;
 397        }
 398
 399        /*
 400         * Note that this logic is structured to accommodate Mali LPAE
 401         * having stage-1-like attributes but stage-2-like permissions.
 402         */
 403        if (data->iop.fmt == ARM_64_LPAE_S2 ||
 404            data->iop.fmt == ARM_32_LPAE_S2) {
 405                if (prot & IOMMU_MMIO)
 406                        pte |= ARM_LPAE_PTE_MEMATTR_DEV;
 407                else if (prot & IOMMU_CACHE)
 408                        pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
 409                else
 410                        pte |= ARM_LPAE_PTE_MEMATTR_NC;
 411        } else {
 412                if (prot & IOMMU_MMIO)
 413                        pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
 414                                << ARM_LPAE_PTE_ATTRINDX_SHIFT);
 415                else if (prot & IOMMU_CACHE)
 416                        pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
 417                                << ARM_LPAE_PTE_ATTRINDX_SHIFT);
 418        }
 419
 420        /*
 421         * Also Mali has its own notions of shareability wherein its Inner
 422         * domain covers the cores within the GPU, and its Outer domain is
 423         * "outside the GPU" (i.e. either the Inner or System domain in CPU
 424         * terms, depending on coherency).
 425         */
 426        if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
 427                pte |= ARM_LPAE_PTE_SH_IS;
 428        else
 429                pte |= ARM_LPAE_PTE_SH_OS;
 430
 431        if (prot & IOMMU_NOEXEC)
 432                pte |= ARM_LPAE_PTE_XN;
 433
 434        if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
 435                pte |= ARM_LPAE_PTE_NS;
 436
 437        if (data->iop.fmt != ARM_MALI_LPAE)
 438                pte |= ARM_LPAE_PTE_AF;
 439
 440        return pte;
 441}
 442
 443static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 444                        phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
 445{
 446        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 447        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 448        arm_lpae_iopte *ptep = data->pgd;
 449        int ret, lvl = data->start_level;
 450        arm_lpae_iopte prot;
 451        long iaext = (s64)iova >> cfg->ias;
 452
 453        if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
 454                return -EINVAL;
 455
 456        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
 457                iaext = ~iaext;
 458        if (WARN_ON(iaext || paddr >> cfg->oas))
 459                return -ERANGE;
 460
 461        /* If no access, then nothing to do */
 462        if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
 463                return 0;
 464
 465        prot = arm_lpae_prot_to_pte(data, iommu_prot);
 466        ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
 467        /*
 468         * Synchronise all PTE updates for the new mapping before there's
 469         * a chance for anything to kick off a table walk for the new iova.
 470         */
 471        wmb();
 472
 473        return ret;
 474}
 475
 476static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 477                                    arm_lpae_iopte *ptep)
 478{
 479        arm_lpae_iopte *start, *end;
 480        unsigned long table_size;
 481
 482        if (lvl == data->start_level)
 483                table_size = ARM_LPAE_PGD_SIZE(data);
 484        else
 485                table_size = ARM_LPAE_GRANULE(data);
 486
 487        start = ptep;
 488
 489        /* Only leaf entries at the last level */
 490        if (lvl == ARM_LPAE_MAX_LEVELS - 1)
 491                end = ptep;
 492        else
 493                end = (void *)ptep + table_size;
 494
 495        while (ptep != end) {
 496                arm_lpae_iopte pte = *ptep++;
 497
 498                if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
 499                        continue;
 500
 501                __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 502        }
 503
 504        __arm_lpae_free_pages(start, table_size, &data->iop.cfg);
 505}
 506
 507static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 508{
 509        struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
 510
 511        __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
 512        kfree(data);
 513}
 514
 515static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 516                                       struct iommu_iotlb_gather *gather,
 517                                       unsigned long iova, size_t size,
 518                                       arm_lpae_iopte blk_pte, int lvl,
 519                                       arm_lpae_iopte *ptep)
 520{
 521        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 522        arm_lpae_iopte pte, *tablep;
 523        phys_addr_t blk_paddr;
 524        size_t tablesz = ARM_LPAE_GRANULE(data);
 525        size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 526        int i, unmap_idx = -1;
 527
 528        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 529                return 0;
 530
 531        tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
 532        if (!tablep)
 533                return 0; /* Bytes unmapped */
 534
 535        if (size == split_sz)
 536                unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 537
 538        blk_paddr = iopte_to_paddr(blk_pte, data);
 539        pte = iopte_prot(blk_pte);
 540
 541        for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
 542                /* Unmap! */
 543                if (i == unmap_idx)
 544                        continue;
 545
 546                __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
 547        }
 548
 549        pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
 550        if (pte != blk_pte) {
 551                __arm_lpae_free_pages(tablep, tablesz, cfg);
 552                /*
 553                 * We may race against someone unmapping another part of this
 554                 * block, but anything else is invalid. We can't misinterpret
 555                 * a page entry here since we're never at the last level.
 556                 */
 557                if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
 558                        return 0;
 559
 560                tablep = iopte_deref(pte, data);
 561        } else if (unmap_idx >= 0) {
 562                io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
 563                return size;
 564        }
 565
 566        return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
 567}
 568
 569static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 570                               struct iommu_iotlb_gather *gather,
 571                               unsigned long iova, size_t size, int lvl,
 572                               arm_lpae_iopte *ptep)
 573{
 574        arm_lpae_iopte pte;
 575        struct io_pgtable *iop = &data->iop;
 576
 577        /* Something went horribly wrong and we ran out of page table */
 578        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 579                return 0;
 580
 581        ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 582        pte = READ_ONCE(*ptep);
 583        if (WARN_ON(!pte))
 584                return 0;
 585
 586        /* If the size matches this level, we're in the right place */
 587        if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 588                __arm_lpae_set_pte(ptep, 0, &iop->cfg);
 589
 590                if (!iopte_leaf(pte, lvl, iop->fmt)) {
 591                        /* Also flush any partial walks */
 592                        io_pgtable_tlb_flush_walk(iop, iova, size,
 593                                                  ARM_LPAE_GRANULE(data));
 594                        ptep = iopte_deref(pte, data);
 595                        __arm_lpae_free_pgtable(data, lvl + 1, ptep);
 596                } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
 597                        /*
 598                         * Order the PTE update against queueing the IOVA, to
 599                         * guarantee that a flush callback from a different CPU
 600                         * has observed it before the TLBIALL can be issued.
 601                         */
 602                        smp_wmb();
 603                } else {
 604                        io_pgtable_tlb_add_page(iop, gather, iova, size);
 605                }
 606
 607                return size;
 608        } else if (iopte_leaf(pte, lvl, iop->fmt)) {
 609                /*
 610                 * Insert a table at the next level to map the old region,
 611                 * minus the part we want to unmap
 612                 */
 613                return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
 614                                                lvl + 1, ptep);
 615        }
 616
 617        /* Keep on walkin' */
 618        ptep = iopte_deref(pte, data);
 619        return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
 620}
 621
 622static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 623                             size_t size, struct iommu_iotlb_gather *gather)
 624{
 625        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 626        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 627        arm_lpae_iopte *ptep = data->pgd;
 628        long iaext = (s64)iova >> cfg->ias;
 629
 630        if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
 631                return 0;
 632
 633        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
 634                iaext = ~iaext;
 635        if (WARN_ON(iaext))
 636                return 0;
 637
 638        return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
 639}
 640
 641static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 642                                         unsigned long iova)
 643{
 644        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 645        arm_lpae_iopte pte, *ptep = data->pgd;
 646        int lvl = data->start_level;
 647
 648        do {
 649                /* Valid IOPTE pointer? */
 650                if (!ptep)
 651                        return 0;
 652
 653                /* Grab the IOPTE we're interested in */
 654                ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 655                pte = READ_ONCE(*ptep);
 656
 657                /* Valid entry? */
 658                if (!pte)
 659                        return 0;
 660
 661                /* Leaf entry? */
 662                if (iopte_leaf(pte, lvl, data->iop.fmt))
 663                        goto found_translation;
 664
 665                /* Take it to the next level */
 666                ptep = iopte_deref(pte, data);
 667        } while (++lvl < ARM_LPAE_MAX_LEVELS);
 668
 669        /* Ran out of page tables to walk */
 670        return 0;
 671
 672found_translation:
 673        iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
 674        return iopte_to_paddr(pte, data) | iova;
 675}
 676
 677static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 678{
 679        unsigned long granule, page_sizes;
 680        unsigned int max_addr_bits = 48;
 681
 682        /*
 683         * We need to restrict the supported page sizes to match the
 684         * translation regime for a particular granule. Aim to match
 685         * the CPU page size if possible, otherwise prefer smaller sizes.
 686         * While we're at it, restrict the block sizes to match the
 687         * chosen granule.
 688         */
 689        if (cfg->pgsize_bitmap & PAGE_SIZE)
 690                granule = PAGE_SIZE;
 691        else if (cfg->pgsize_bitmap & ~PAGE_MASK)
 692                granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
 693        else if (cfg->pgsize_bitmap & PAGE_MASK)
 694                granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
 695        else
 696                granule = 0;
 697
 698        switch (granule) {
 699        case SZ_4K:
 700                page_sizes = (SZ_4K | SZ_2M | SZ_1G);
 701                break;
 702        case SZ_16K:
 703                page_sizes = (SZ_16K | SZ_32M);
 704                break;
 705        case SZ_64K:
 706                max_addr_bits = 52;
 707                page_sizes = (SZ_64K | SZ_512M);
 708                if (cfg->oas > 48)
 709                        page_sizes |= 1ULL << 42; /* 4TB */
 710                break;
 711        default:
 712                page_sizes = 0;
 713        }
 714
 715        cfg->pgsize_bitmap &= page_sizes;
 716        cfg->ias = min(cfg->ias, max_addr_bits);
 717        cfg->oas = min(cfg->oas, max_addr_bits);
 718}
 719
 720static struct arm_lpae_io_pgtable *
 721arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 722{
 723        struct arm_lpae_io_pgtable *data;
 724        int levels, va_bits, pg_shift;
 725
 726        arm_lpae_restrict_pgsizes(cfg);
 727
 728        if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
 729                return NULL;
 730
 731        if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
 732                return NULL;
 733
 734        if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
 735                return NULL;
 736
 737        data = kmalloc(sizeof(*data), GFP_KERNEL);
 738        if (!data)
 739                return NULL;
 740
 741        pg_shift = __ffs(cfg->pgsize_bitmap);
 742        data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
 743
 744        va_bits = cfg->ias - pg_shift;
 745        levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
 746        data->start_level = ARM_LPAE_MAX_LEVELS - levels;
 747
 748        /* Calculate the actual size of our pgd (without concatenation) */
 749        data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
 750
 751        data->iop.ops = (struct io_pgtable_ops) {
 752                .map            = arm_lpae_map,
 753                .unmap          = arm_lpae_unmap,
 754                .iova_to_phys   = arm_lpae_iova_to_phys,
 755        };
 756
 757        return data;
 758}
 759
 760static struct io_pgtable *
 761arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 762{
 763        u64 reg;
 764        struct arm_lpae_io_pgtable *data;
 765        typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
 766        bool tg1;
 767
 768        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 769                            IO_PGTABLE_QUIRK_NON_STRICT |
 770                            IO_PGTABLE_QUIRK_ARM_TTBR1 |
 771                            IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
 772                return NULL;
 773
 774        data = arm_lpae_alloc_pgtable(cfg);
 775        if (!data)
 776                return NULL;
 777
 778        /* TCR */
 779        if (cfg->coherent_walk) {
 780                tcr->sh = ARM_LPAE_TCR_SH_IS;
 781                tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
 782                tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 783                if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
 784                        goto out_free_data;
 785        } else {
 786                tcr->sh = ARM_LPAE_TCR_SH_OS;
 787                tcr->irgn = ARM_LPAE_TCR_RGN_NC;
 788                if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
 789                        tcr->orgn = ARM_LPAE_TCR_RGN_NC;
 790                else
 791                        tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 792        }
 793
 794        tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
 795        switch (ARM_LPAE_GRANULE(data)) {
 796        case SZ_4K:
 797                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
 798                break;
 799        case SZ_16K:
 800                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
 801                break;
 802        case SZ_64K:
 803                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
 804                break;
 805        }
 806
 807        switch (cfg->oas) {
 808        case 32:
 809                tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
 810                break;
 811        case 36:
 812                tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
 813                break;
 814        case 40:
 815                tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
 816                break;
 817        case 42:
 818                tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
 819                break;
 820        case 44:
 821                tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
 822                break;
 823        case 48:
 824                tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
 825                break;
 826        case 52:
 827                tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
 828                break;
 829        default:
 830                goto out_free_data;
 831        }
 832
 833        tcr->tsz = 64ULL - cfg->ias;
 834
 835        /* MAIRs */
 836        reg = (ARM_LPAE_MAIR_ATTR_NC
 837               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
 838              (ARM_LPAE_MAIR_ATTR_WBRWA
 839               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
 840              (ARM_LPAE_MAIR_ATTR_DEVICE
 841               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
 842              (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
 843               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
 844
 845        cfg->arm_lpae_s1_cfg.mair = reg;
 846
 847        /* Looking good; allocate a pgd */
 848        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
 849                                           GFP_KERNEL, cfg);
 850        if (!data->pgd)
 851                goto out_free_data;
 852
 853        /* Ensure the empty pgd is visible before any actual TTBR write */
 854        wmb();
 855
 856        /* TTBR */
 857        cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
 858        return &data->iop;
 859
 860out_free_data:
 861        kfree(data);
 862        return NULL;
 863}
 864
 865static struct io_pgtable *
 866arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 867{
 868        u64 sl;
 869        struct arm_lpae_io_pgtable *data;
 870        typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
 871
 872        /* The NS quirk doesn't apply at stage 2 */
 873        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
 874                return NULL;
 875
 876        data = arm_lpae_alloc_pgtable(cfg);
 877        if (!data)
 878                return NULL;
 879
 880        /*
 881         * Concatenate PGDs at level 1 if possible in order to reduce
 882         * the depth of the stage-2 walk.
 883         */
 884        if (data->start_level == 0) {
 885                unsigned long pgd_pages;
 886
 887                pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
 888                if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
 889                        data->pgd_bits += data->bits_per_level;
 890                        data->start_level++;
 891                }
 892        }
 893
 894        /* VTCR */
 895        if (cfg->coherent_walk) {
 896                vtcr->sh = ARM_LPAE_TCR_SH_IS;
 897                vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
 898                vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 899        } else {
 900                vtcr->sh = ARM_LPAE_TCR_SH_OS;
 901                vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
 902                vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
 903        }
 904
 905        sl = data->start_level;
 906
 907        switch (ARM_LPAE_GRANULE(data)) {
 908        case SZ_4K:
 909                vtcr->tg = ARM_LPAE_TCR_TG0_4K;
 910                sl++; /* SL0 format is different for 4K granule size */
 911                break;
 912        case SZ_16K:
 913                vtcr->tg = ARM_LPAE_TCR_TG0_16K;
 914                break;
 915        case SZ_64K:
 916                vtcr->tg = ARM_LPAE_TCR_TG0_64K;
 917                break;
 918        }
 919
 920        switch (cfg->oas) {
 921        case 32:
 922                vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
 923                break;
 924        case 36:
 925                vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
 926                break;
 927        case 40:
 928                vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
 929                break;
 930        case 42:
 931                vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
 932                break;
 933        case 44:
 934                vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
 935                break;
 936        case 48:
 937                vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
 938                break;
 939        case 52:
 940                vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
 941                break;
 942        default:
 943                goto out_free_data;
 944        }
 945
 946        vtcr->tsz = 64ULL - cfg->ias;
 947        vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
 948
 949        /* Allocate pgd pages */
 950        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
 951                                           GFP_KERNEL, cfg);
 952        if (!data->pgd)
 953                goto out_free_data;
 954
 955        /* Ensure the empty pgd is visible before any actual TTBR write */
 956        wmb();
 957
 958        /* VTTBR */
 959        cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
 960        return &data->iop;
 961
 962out_free_data:
 963        kfree(data);
 964        return NULL;
 965}
 966
 967static struct io_pgtable *
 968arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 969{
 970        if (cfg->ias > 32 || cfg->oas > 40)
 971                return NULL;
 972
 973        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
 974        return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
 975}
 976
 977static struct io_pgtable *
 978arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 979{
 980        if (cfg->ias > 40 || cfg->oas > 40)
 981                return NULL;
 982
 983        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
 984        return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
 985}
 986
 987static struct io_pgtable *
 988arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
 989{
 990        struct arm_lpae_io_pgtable *data;
 991
 992        /* No quirks for Mali (hopefully) */
 993        if (cfg->quirks)
 994                return NULL;
 995
 996        if (cfg->ias > 48 || cfg->oas > 40)
 997                return NULL;
 998
 999        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1000
1001        data = arm_lpae_alloc_pgtable(cfg);
1002        if (!data)
1003                return NULL;
1004
1005        /* Mali seems to need a full 4-level table regardless of IAS */
1006        if (data->start_level > 0) {
1007                data->start_level = 0;
1008                data->pgd_bits = 0;
1009        }
1010        /*
1011         * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1012         * best we can do is mimic the out-of-tree driver and hope that the
1013         * "implementation-defined caching policy" is good enough. Similarly,
1014         * we'll use it for the sake of a valid attribute for our 'device'
1015         * index, although callers should never request that in practice.
1016         */
1017        cfg->arm_mali_lpae_cfg.memattr =
1018                (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1019                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1020                (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1021                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1022                (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1023                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
1024
1025        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1026                                           cfg);
1027        if (!data->pgd)
1028                goto out_free_data;
1029
1030        /* Ensure the empty pgd is visible before TRANSTAB can be written */
1031        wmb();
1032
1033        cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
1034                                          ARM_MALI_LPAE_TTBR_READ_INNER |
1035                                          ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
1036        if (cfg->coherent_walk)
1037                cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
1038
1039        return &data->iop;
1040
1041out_free_data:
1042        kfree(data);
1043        return NULL;
1044}
1045
1046struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
1047        .alloc  = arm_64_lpae_alloc_pgtable_s1,
1048        .free   = arm_lpae_free_pgtable,
1049};
1050
1051struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
1052        .alloc  = arm_64_lpae_alloc_pgtable_s2,
1053        .free   = arm_lpae_free_pgtable,
1054};
1055
1056struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
1057        .alloc  = arm_32_lpae_alloc_pgtable_s1,
1058        .free   = arm_lpae_free_pgtable,
1059};
1060
1061struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
1062        .alloc  = arm_32_lpae_alloc_pgtable_s2,
1063        .free   = arm_lpae_free_pgtable,
1064};
1065
1066struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
1067        .alloc  = arm_mali_lpae_alloc_pgtable,
1068        .free   = arm_lpae_free_pgtable,
1069};
1070
1071#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1072
1073static struct io_pgtable_cfg *cfg_cookie __initdata;
1074
1075static void __init dummy_tlb_flush_all(void *cookie)
1076{
1077        WARN_ON(cookie != cfg_cookie);
1078}
1079
1080static void __init dummy_tlb_flush(unsigned long iova, size_t size,
1081                                   size_t granule, void *cookie)
1082{
1083        WARN_ON(cookie != cfg_cookie);
1084        WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
1085}
1086
1087static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
1088                                      unsigned long iova, size_t granule,
1089                                      void *cookie)
1090{
1091        dummy_tlb_flush(iova, granule, granule, cookie);
1092}
1093
1094static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
1095        .tlb_flush_all  = dummy_tlb_flush_all,
1096        .tlb_flush_walk = dummy_tlb_flush,
1097        .tlb_add_page   = dummy_tlb_add_page,
1098};
1099
1100static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
1101{
1102        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
1103        struct io_pgtable_cfg *cfg = &data->iop.cfg;
1104
1105        pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1106                cfg->pgsize_bitmap, cfg->ias);
1107        pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1108                ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
1109                ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
1110}
1111
1112#define __FAIL(ops, i)  ({                                              \
1113                WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
1114                arm_lpae_dump_ops(ops);                                 \
1115                selftest_running = false;                               \
1116                -EFAULT;                                                \
1117})
1118
1119static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
1120{
1121        static const enum io_pgtable_fmt fmts[] __initconst = {
1122                ARM_64_LPAE_S1,
1123                ARM_64_LPAE_S2,
1124        };
1125
1126        int i, j;
1127        unsigned long iova;
1128        size_t size;
1129        struct io_pgtable_ops *ops;
1130
1131        selftest_running = true;
1132
1133        for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
1134                cfg_cookie = cfg;
1135                ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
1136                if (!ops) {
1137                        pr_err("selftest: failed to allocate io pgtable ops\n");
1138                        return -ENOMEM;
1139                }
1140
1141                /*
1142                 * Initial sanity checks.
1143                 * Empty page tables shouldn't provide any translations.
1144                 */
1145                if (ops->iova_to_phys(ops, 42))
1146                        return __FAIL(ops, i);
1147
1148                if (ops->iova_to_phys(ops, SZ_1G + 42))
1149                        return __FAIL(ops, i);
1150
1151                if (ops->iova_to_phys(ops, SZ_2G + 42))
1152                        return __FAIL(ops, i);
1153
1154                /*
1155                 * Distinct mappings of different granule sizes.
1156                 */
1157                iova = 0;
1158                for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1159                        size = 1UL << j;
1160
1161                        if (ops->map(ops, iova, iova, size, IOMMU_READ |
1162                                                            IOMMU_WRITE |
1163                                                            IOMMU_NOEXEC |
1164                                                            IOMMU_CACHE, GFP_KERNEL))
1165                                return __FAIL(ops, i);
1166
1167                        /* Overlapping mappings */
1168                        if (!ops->map(ops, iova, iova + size, size,
1169                                      IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
1170                                return __FAIL(ops, i);
1171
1172                        if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1173                                return __FAIL(ops, i);
1174
1175                        iova += SZ_1G;
1176                }
1177
1178                /* Partial unmap */
1179                size = 1UL << __ffs(cfg->pgsize_bitmap);
1180                if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
1181                        return __FAIL(ops, i);
1182
1183                /* Remap of partial unmap */
1184                if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL))
1185                        return __FAIL(ops, i);
1186
1187                if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
1188                        return __FAIL(ops, i);
1189
1190                /* Full unmap */
1191                iova = 0;
1192                for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1193                        size = 1UL << j;
1194
1195                        if (ops->unmap(ops, iova, size, NULL) != size)
1196                                return __FAIL(ops, i);
1197
1198                        if (ops->iova_to_phys(ops, iova + 42))
1199                                return __FAIL(ops, i);
1200
1201                        /* Remap full block */
1202                        if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
1203                                return __FAIL(ops, i);
1204
1205                        if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1206                                return __FAIL(ops, i);
1207
1208                        iova += SZ_1G;
1209                }
1210
1211                free_io_pgtable_ops(ops);
1212        }
1213
1214        selftest_running = false;
1215        return 0;
1216}
1217
1218static int __init arm_lpae_do_selftests(void)
1219{
1220        static const unsigned long pgsize[] __initconst = {
1221                SZ_4K | SZ_2M | SZ_1G,
1222                SZ_16K | SZ_32M,
1223                SZ_64K | SZ_512M,
1224        };
1225
1226        static const unsigned int ias[] __initconst = {
1227                32, 36, 40, 42, 44, 48,
1228        };
1229
1230        int i, j, pass = 0, fail = 0;
1231        struct io_pgtable_cfg cfg = {
1232                .tlb = &dummy_tlb_ops,
1233                .oas = 48,
1234                .coherent_walk = true,
1235        };
1236
1237        for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
1238                for (j = 0; j < ARRAY_SIZE(ias); ++j) {
1239                        cfg.pgsize_bitmap = pgsize[i];
1240                        cfg.ias = ias[j];
1241                        pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1242                                pgsize[i], ias[j]);
1243                        if (arm_lpae_run_tests(&cfg))
1244                                fail++;
1245                        else
1246                                pass++;
1247                }
1248        }
1249
1250        pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
1251        return fail ? -EFAULT : 0;
1252}
1253subsys_initcall(arm_lpae_do_selftests);
1254#endif
1255