linux/drivers/iommu/io-pgtable-arm.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * CPU-agnostic ARM page table allocator.
   4 *
   5 * Copyright (C) 2014 ARM Limited
   6 *
   7 * Author: Will Deacon <will.deacon@arm.com>
   8 */
   9
  10#define pr_fmt(fmt)     "arm-lpae io-pgtable: " fmt
  11
  12#include <linux/atomic.h>
  13#include <linux/bitops.h>
  14#include <linux/io-pgtable.h>
  15#include <linux/kernel.h>
  16#include <linux/sizes.h>
  17#include <linux/slab.h>
  18#include <linux/types.h>
  19#include <linux/dma-mapping.h>
  20
  21#include <asm/barrier.h>
  22
  23#include "io-pgtable-arm.h"
  24
  25#define ARM_LPAE_MAX_ADDR_BITS          52
  26#define ARM_LPAE_S2_MAX_CONCAT_PAGES    16
  27#define ARM_LPAE_MAX_LEVELS             4
  28
  29/* Struct accessors */
  30#define io_pgtable_to_data(x)                                           \
  31        container_of((x), struct arm_lpae_io_pgtable, iop)
  32
  33#define io_pgtable_ops_to_data(x)                                       \
  34        io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
  35
  36/*
  37 * Calculate the right shift amount to get to the portion describing level l
  38 * in a virtual address mapped by the pagetable in d.
  39 */
  40#define ARM_LPAE_LVL_SHIFT(l,d)                                         \
  41        (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) +          \
  42        ilog2(sizeof(arm_lpae_iopte)))
  43
  44#define ARM_LPAE_GRANULE(d)                                             \
  45        (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
  46#define ARM_LPAE_PGD_SIZE(d)                                            \
  47        (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
  48
  49#define ARM_LPAE_PTES_PER_TABLE(d)                                      \
  50        (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
  51
  52/*
  53 * Calculate the index at level l used to map virtual address a using the
  54 * pagetable in d.
  55 */
  56#define ARM_LPAE_PGD_IDX(l,d)                                           \
  57        ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
  58
  59#define ARM_LPAE_LVL_IDX(a,l,d)                                         \
  60        (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) &                        \
  61         ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
  62
  63/* Calculate the block/page mapping size at level l for pagetable in d. */
  64#define ARM_LPAE_BLOCK_SIZE(l,d)        (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
  65
  66/* Page table bits */
  67#define ARM_LPAE_PTE_TYPE_SHIFT         0
  68#define ARM_LPAE_PTE_TYPE_MASK          0x3
  69
  70#define ARM_LPAE_PTE_TYPE_BLOCK         1
  71#define ARM_LPAE_PTE_TYPE_TABLE         3
  72#define ARM_LPAE_PTE_TYPE_PAGE          3
  73
  74#define ARM_LPAE_PTE_ADDR_MASK          GENMASK_ULL(47,12)
  75
  76#define ARM_LPAE_PTE_NSTABLE            (((arm_lpae_iopte)1) << 63)
  77#define ARM_LPAE_PTE_XN                 (((arm_lpae_iopte)3) << 53)
  78#define ARM_LPAE_PTE_AF                 (((arm_lpae_iopte)1) << 10)
  79#define ARM_LPAE_PTE_SH_NS              (((arm_lpae_iopte)0) << 8)
  80#define ARM_LPAE_PTE_SH_OS              (((arm_lpae_iopte)2) << 8)
  81#define ARM_LPAE_PTE_SH_IS              (((arm_lpae_iopte)3) << 8)
  82#define ARM_LPAE_PTE_NS                 (((arm_lpae_iopte)1) << 5)
  83#define ARM_LPAE_PTE_VALID              (((arm_lpae_iopte)1) << 0)
  84
  85#define ARM_LPAE_PTE_ATTR_LO_MASK       (((arm_lpae_iopte)0x3ff) << 2)
  86/* Ignore the contiguous bit for block splitting */
  87#define ARM_LPAE_PTE_ATTR_HI_MASK       (((arm_lpae_iopte)6) << 52)
  88#define ARM_LPAE_PTE_ATTR_MASK          (ARM_LPAE_PTE_ATTR_LO_MASK |    \
  89                                         ARM_LPAE_PTE_ATTR_HI_MASK)
  90/* Software bit for solving coherency races */
  91#define ARM_LPAE_PTE_SW_SYNC            (((arm_lpae_iopte)1) << 55)
  92
  93/* Stage-1 PTE */
  94#define ARM_LPAE_PTE_AP_UNPRIV          (((arm_lpae_iopte)1) << 6)
  95#define ARM_LPAE_PTE_AP_RDONLY          (((arm_lpae_iopte)2) << 6)
  96#define ARM_LPAE_PTE_ATTRINDX_SHIFT     2
  97#define ARM_LPAE_PTE_nG                 (((arm_lpae_iopte)1) << 11)
  98
  99/* Stage-2 PTE */
 100#define ARM_LPAE_PTE_HAP_FAULT          (((arm_lpae_iopte)0) << 6)
 101#define ARM_LPAE_PTE_HAP_READ           (((arm_lpae_iopte)1) << 6)
 102#define ARM_LPAE_PTE_HAP_WRITE          (((arm_lpae_iopte)2) << 6)
 103#define ARM_LPAE_PTE_MEMATTR_OIWB       (((arm_lpae_iopte)0xf) << 2)
 104#define ARM_LPAE_PTE_MEMATTR_NC         (((arm_lpae_iopte)0x5) << 2)
 105#define ARM_LPAE_PTE_MEMATTR_DEV        (((arm_lpae_iopte)0x1) << 2)
 106
 107/* Register bits */
 108#define ARM_LPAE_VTCR_SL0_MASK          0x3
 109
 110#define ARM_LPAE_TCR_T0SZ_SHIFT         0
 111
 112#define ARM_LPAE_VTCR_PS_SHIFT          16
 113#define ARM_LPAE_VTCR_PS_MASK           0x7
 114
 115#define ARM_LPAE_MAIR_ATTR_SHIFT(n)     ((n) << 3)
 116#define ARM_LPAE_MAIR_ATTR_MASK         0xff
 117#define ARM_LPAE_MAIR_ATTR_DEVICE       0x04
 118#define ARM_LPAE_MAIR_ATTR_NC           0x44
 119#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA   0xf4
 120#define ARM_LPAE_MAIR_ATTR_WBRWA        0xff
 121#define ARM_LPAE_MAIR_ATTR_IDX_NC       0
 122#define ARM_LPAE_MAIR_ATTR_IDX_CACHE    1
 123#define ARM_LPAE_MAIR_ATTR_IDX_DEV      2
 124#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE       3
 125
 126#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
 127#define ARM_MALI_LPAE_TTBR_READ_INNER   BIT(2)
 128#define ARM_MALI_LPAE_TTBR_SHARE_OUTER  BIT(4)
 129
 130#define ARM_MALI_LPAE_MEMATTR_IMP_DEF   0x88ULL
 131#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
 132
 133#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)
 134#define APPLE_DART_PTE_PROT_NO_READ (1<<8)
 135
 136/* IOPTE accessors */
 137#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 138
 139#define iopte_type(pte)                                 \
 140        (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
 141
 142#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
 143
 144struct arm_lpae_io_pgtable {
 145        struct io_pgtable       iop;
 146
 147        int                     pgd_bits;
 148        int                     start_level;
 149        int                     bits_per_level;
 150
 151        void                    *pgd;
 152};
 153
 154typedef u64 arm_lpae_iopte;
 155
 156static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
 157                              enum io_pgtable_fmt fmt)
 158{
 159        if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
 160                return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE;
 161
 162        return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
 163}
 164
 165static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
 166                                     struct arm_lpae_io_pgtable *data)
 167{
 168        arm_lpae_iopte pte = paddr;
 169
 170        /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
 171        return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
 172}
 173
 174static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
 175                                  struct arm_lpae_io_pgtable *data)
 176{
 177        u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
 178
 179        if (ARM_LPAE_GRANULE(data) < SZ_64K)
 180                return paddr;
 181
 182        /* Rotate the packed high-order bits back to the top */
 183        return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
 184}
 185
 186static bool selftest_running = false;
 187
 188static dma_addr_t __arm_lpae_dma_addr(void *pages)
 189{
 190        return (dma_addr_t)virt_to_phys(pages);
 191}
 192
 193static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 194                                    struct io_pgtable_cfg *cfg)
 195{
 196        struct device *dev = cfg->iommu_dev;
 197        int order = get_order(size);
 198        struct page *p;
 199        dma_addr_t dma;
 200        void *pages;
 201
 202        VM_BUG_ON((gfp & __GFP_HIGHMEM));
 203        p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE,
 204                             gfp | __GFP_ZERO, order);
 205        if (!p)
 206                return NULL;
 207
 208        pages = page_address(p);
 209        if (!cfg->coherent_walk) {
 210                dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 211                if (dma_mapping_error(dev, dma))
 212                        goto out_free;
 213                /*
 214                 * We depend on the IOMMU being able to work with any physical
 215                 * address directly, so if the DMA layer suggests otherwise by
 216                 * translating or truncating them, that bodes very badly...
 217                 */
 218                if (dma != virt_to_phys(pages))
 219                        goto out_unmap;
 220        }
 221
 222        return pages;
 223
 224out_unmap:
 225        dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
 226        dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
 227out_free:
 228        __free_pages(p, order);
 229        return NULL;
 230}
 231
 232static void __arm_lpae_free_pages(void *pages, size_t size,
 233                                  struct io_pgtable_cfg *cfg)
 234{
 235        if (!cfg->coherent_walk)
 236                dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
 237                                 size, DMA_TO_DEVICE);
 238        free_pages((unsigned long)pages, get_order(size));
 239}
 240
 241static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
 242                                struct io_pgtable_cfg *cfg)
 243{
 244        dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
 245                                   sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 246}
 247
 248static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
 249{
 250
 251        *ptep = 0;
 252
 253        if (!cfg->coherent_walk)
 254                __arm_lpae_sync_pte(ptep, 1, cfg);
 255}
 256
 257static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 258                               struct iommu_iotlb_gather *gather,
 259                               unsigned long iova, size_t size, size_t pgcount,
 260                               int lvl, arm_lpae_iopte *ptep);
 261
 262static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 263                                phys_addr_t paddr, arm_lpae_iopte prot,
 264                                int lvl, int num_entries, arm_lpae_iopte *ptep)
 265{
 266        arm_lpae_iopte pte = prot;
 267        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 268        size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 269        int i;
 270
 271        if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
 272                pte |= ARM_LPAE_PTE_TYPE_PAGE;
 273        else
 274                pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 275
 276        for (i = 0; i < num_entries; i++)
 277                ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
 278
 279        if (!cfg->coherent_walk)
 280                __arm_lpae_sync_pte(ptep, num_entries, cfg);
 281}
 282
 283static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 284                             unsigned long iova, phys_addr_t paddr,
 285                             arm_lpae_iopte prot, int lvl, int num_entries,
 286                             arm_lpae_iopte *ptep)
 287{
 288        int i;
 289
 290        for (i = 0; i < num_entries; i++)
 291                if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
 292                        /* We require an unmap first */
 293                        WARN_ON(!selftest_running);
 294                        return -EEXIST;
 295                } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
 296                        /*
 297                         * We need to unmap and free the old table before
 298                         * overwriting it with a block entry.
 299                         */
 300                        arm_lpae_iopte *tblp;
 301                        size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 302
 303                        tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
 304                        if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
 305                                             lvl, tblp) != sz) {
 306                                WARN_ON(1);
 307                                return -EINVAL;
 308                        }
 309                }
 310
 311        __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
 312        return 0;
 313}
 314
 315static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 316                                             arm_lpae_iopte *ptep,
 317                                             arm_lpae_iopte curr,
 318                                             struct io_pgtable_cfg *cfg)
 319{
 320        arm_lpae_iopte old, new;
 321
 322        new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
 323        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 324                new |= ARM_LPAE_PTE_NSTABLE;
 325
 326        /*
 327         * Ensure the table itself is visible before its PTE can be.
 328         * Whilst we could get away with cmpxchg64_release below, this
 329         * doesn't have any ordering semantics when !CONFIG_SMP.
 330         */
 331        dma_wmb();
 332
 333        old = cmpxchg64_relaxed(ptep, curr, new);
 334
 335        if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
 336                return old;
 337
 338        /* Even if it's not ours, there's no point waiting; just kick it */
 339        __arm_lpae_sync_pte(ptep, 1, cfg);
 340        if (old == curr)
 341                WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
 342
 343        return old;
 344}
 345
 346static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 347                          phys_addr_t paddr, size_t size, size_t pgcount,
 348                          arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
 349                          gfp_t gfp, size_t *mapped)
 350{
 351        arm_lpae_iopte *cptep, pte;
 352        size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 353        size_t tblsz = ARM_LPAE_GRANULE(data);
 354        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 355        int ret = 0, num_entries, max_entries, map_idx_start;
 356
 357        /* Find our entry at the current level */
 358        map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 359        ptep += map_idx_start;
 360
 361        /* If we can install a leaf entry at this level, then do so */
 362        if (size == block_size) {
 363                max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
 364                num_entries = min_t(int, pgcount, max_entries);
 365                ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
 366                if (!ret && mapped)
 367                        *mapped += num_entries * size;
 368
 369                return ret;
 370        }
 371
 372        /* We can't allocate tables at the final level */
 373        if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
 374                return -EINVAL;
 375
 376        /* Grab a pointer to the next level */
 377        pte = READ_ONCE(*ptep);
 378        if (!pte) {
 379                cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg);
 380                if (!cptep)
 381                        return -ENOMEM;
 382
 383                pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
 384                if (pte)
 385                        __arm_lpae_free_pages(cptep, tblsz, cfg);
 386        } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
 387                __arm_lpae_sync_pte(ptep, 1, cfg);
 388        }
 389
 390        if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
 391                cptep = iopte_deref(pte, data);
 392        } else if (pte) {
 393                /* We require an unmap first */
 394                WARN_ON(!selftest_running);
 395                return -EEXIST;
 396        }
 397
 398        /* Rinse, repeat */
 399        return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
 400                              cptep, gfp, mapped);
 401}
 402
 403static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 404                                           int prot)
 405{
 406        arm_lpae_iopte pte;
 407
 408        if (data->iop.fmt == APPLE_DART) {
 409                pte = 0;
 410                if (!(prot & IOMMU_WRITE))
 411                        pte |= APPLE_DART_PTE_PROT_NO_WRITE;
 412                if (!(prot & IOMMU_READ))
 413                        pte |= APPLE_DART_PTE_PROT_NO_READ;
 414                return pte;
 415        }
 416
 417        if (data->iop.fmt == ARM_64_LPAE_S1 ||
 418            data->iop.fmt == ARM_32_LPAE_S1) {
 419                pte = ARM_LPAE_PTE_nG;
 420                if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 421                        pte |= ARM_LPAE_PTE_AP_RDONLY;
 422                if (!(prot & IOMMU_PRIV))
 423                        pte |= ARM_LPAE_PTE_AP_UNPRIV;
 424        } else {
 425                pte = ARM_LPAE_PTE_HAP_FAULT;
 426                if (prot & IOMMU_READ)
 427                        pte |= ARM_LPAE_PTE_HAP_READ;
 428                if (prot & IOMMU_WRITE)
 429                        pte |= ARM_LPAE_PTE_HAP_WRITE;
 430        }
 431
 432        /*
 433         * Note that this logic is structured to accommodate Mali LPAE
 434         * having stage-1-like attributes but stage-2-like permissions.
 435         */
 436        if (data->iop.fmt == ARM_64_LPAE_S2 ||
 437            data->iop.fmt == ARM_32_LPAE_S2) {
 438                if (prot & IOMMU_MMIO)
 439                        pte |= ARM_LPAE_PTE_MEMATTR_DEV;
 440                else if (prot & IOMMU_CACHE)
 441                        pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
 442                else
 443                        pte |= ARM_LPAE_PTE_MEMATTR_NC;
 444        } else {
 445                if (prot & IOMMU_MMIO)
 446                        pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
 447                                << ARM_LPAE_PTE_ATTRINDX_SHIFT);
 448                else if (prot & IOMMU_CACHE)
 449                        pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
 450                                << ARM_LPAE_PTE_ATTRINDX_SHIFT);
 451        }
 452
 453        /*
 454         * Also Mali has its own notions of shareability wherein its Inner
 455         * domain covers the cores within the GPU, and its Outer domain is
 456         * "outside the GPU" (i.e. either the Inner or System domain in CPU
 457         * terms, depending on coherency).
 458         */
 459        if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
 460                pte |= ARM_LPAE_PTE_SH_IS;
 461        else
 462                pte |= ARM_LPAE_PTE_SH_OS;
 463
 464        if (prot & IOMMU_NOEXEC)
 465                pte |= ARM_LPAE_PTE_XN;
 466
 467        if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
 468                pte |= ARM_LPAE_PTE_NS;
 469
 470        if (data->iop.fmt != ARM_MALI_LPAE)
 471                pte |= ARM_LPAE_PTE_AF;
 472
 473        return pte;
 474}
 475
 476static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
 477                              phys_addr_t paddr, size_t pgsize, size_t pgcount,
 478                              int iommu_prot, gfp_t gfp, size_t *mapped)
 479{
 480        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 481        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 482        arm_lpae_iopte *ptep = data->pgd;
 483        int ret, lvl = data->start_level;
 484        arm_lpae_iopte prot;
 485        long iaext = (s64)iova >> cfg->ias;
 486
 487        if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
 488                return -EINVAL;
 489
 490        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
 491                iaext = ~iaext;
 492        if (WARN_ON(iaext || paddr >> cfg->oas))
 493                return -ERANGE;
 494
 495        /* If no access, then nothing to do */
 496        if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
 497                return 0;
 498
 499        prot = arm_lpae_prot_to_pte(data, iommu_prot);
 500        ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
 501                             ptep, gfp, mapped);
 502        /*
 503         * Synchronise all PTE updates for the new mapping before there's
 504         * a chance for anything to kick off a table walk for the new iova.
 505         */
 506        wmb();
 507
 508        return ret;
 509}
 510
 511static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 512                        phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
 513{
 514        return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
 515                                  NULL);
 516}
 517
 518static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 519                                    arm_lpae_iopte *ptep)
 520{
 521        arm_lpae_iopte *start, *end;
 522        unsigned long table_size;
 523
 524        if (lvl == data->start_level)
 525                table_size = ARM_LPAE_PGD_SIZE(data);
 526        else
 527                table_size = ARM_LPAE_GRANULE(data);
 528
 529        start = ptep;
 530
 531        /* Only leaf entries at the last level */
 532        if (lvl == ARM_LPAE_MAX_LEVELS - 1)
 533                end = ptep;
 534        else
 535                end = (void *)ptep + table_size;
 536
 537        while (ptep != end) {
 538                arm_lpae_iopte pte = *ptep++;
 539
 540                if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
 541                        continue;
 542
 543                __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 544        }
 545
 546        __arm_lpae_free_pages(start, table_size, &data->iop.cfg);
 547}
 548
 549static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 550{
 551        struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
 552
 553        __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
 554        kfree(data);
 555}
 556
 557static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 558                                       struct iommu_iotlb_gather *gather,
 559                                       unsigned long iova, size_t size,
 560                                       arm_lpae_iopte blk_pte, int lvl,
 561                                       arm_lpae_iopte *ptep, size_t pgcount)
 562{
 563        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 564        arm_lpae_iopte pte, *tablep;
 565        phys_addr_t blk_paddr;
 566        size_t tablesz = ARM_LPAE_GRANULE(data);
 567        size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 568        int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
 569        int i, unmap_idx_start = -1, num_entries = 0, max_entries;
 570
 571        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 572                return 0;
 573
 574        tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
 575        if (!tablep)
 576                return 0; /* Bytes unmapped */
 577
 578        if (size == split_sz) {
 579                unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 580                max_entries = ptes_per_table - unmap_idx_start;
 581                num_entries = min_t(int, pgcount, max_entries);
 582        }
 583
 584        blk_paddr = iopte_to_paddr(blk_pte, data);
 585        pte = iopte_prot(blk_pte);
 586
 587        for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
 588                /* Unmap! */
 589                if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
 590                        continue;
 591
 592                __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
 593        }
 594
 595        pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
 596        if (pte != blk_pte) {
 597                __arm_lpae_free_pages(tablep, tablesz, cfg);
 598                /*
 599                 * We may race against someone unmapping another part of this
 600                 * block, but anything else is invalid. We can't misinterpret
 601                 * a page entry here since we're never at the last level.
 602                 */
 603                if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
 604                        return 0;
 605
 606                tablep = iopte_deref(pte, data);
 607        } else if (unmap_idx_start >= 0) {
 608                for (i = 0; i < num_entries; i++)
 609                        io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
 610
 611                return num_entries * size;
 612        }
 613
 614        return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 615}
 616
 617static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 618                               struct iommu_iotlb_gather *gather,
 619                               unsigned long iova, size_t size, size_t pgcount,
 620                               int lvl, arm_lpae_iopte *ptep)
 621{
 622        arm_lpae_iopte pte;
 623        struct io_pgtable *iop = &data->iop;
 624        int i = 0, num_entries, max_entries, unmap_idx_start;
 625
 626        /* Something went horribly wrong and we ran out of page table */
 627        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 628                return 0;
 629
 630        unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 631        ptep += unmap_idx_start;
 632        pte = READ_ONCE(*ptep);
 633        if (WARN_ON(!pte))
 634                return 0;
 635
 636        /* If the size matches this level, we're in the right place */
 637        if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 638                max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
 639                num_entries = min_t(int, pgcount, max_entries);
 640
 641                while (i < num_entries) {
 642                        pte = READ_ONCE(*ptep);
 643                        if (WARN_ON(!pte))
 644                                break;
 645
 646                        __arm_lpae_clear_pte(ptep, &iop->cfg);
 647
 648                        if (!iopte_leaf(pte, lvl, iop->fmt)) {
 649                                /* Also flush any partial walks */
 650                                io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
 651                                                          ARM_LPAE_GRANULE(data));
 652                                __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
 653                        } else if (!iommu_iotlb_gather_queued(gather)) {
 654                                io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
 655                        }
 656
 657                        ptep++;
 658                        i++;
 659                }
 660
 661                return i * size;
 662        } else if (iopte_leaf(pte, lvl, iop->fmt)) {
 663                /*
 664                 * Insert a table at the next level to map the old region,
 665                 * minus the part we want to unmap
 666                 */
 667                return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
 668                                                lvl + 1, ptep, pgcount);
 669        }
 670
 671        /* Keep on walkin' */
 672        ptep = iopte_deref(pte, data);
 673        return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
 674}
 675
 676static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
 677                                   size_t pgsize, size_t pgcount,
 678                                   struct iommu_iotlb_gather *gather)
 679{
 680        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 681        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 682        arm_lpae_iopte *ptep = data->pgd;
 683        long iaext = (s64)iova >> cfg->ias;
 684
 685        if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
 686                return 0;
 687
 688        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
 689                iaext = ~iaext;
 690        if (WARN_ON(iaext))
 691                return 0;
 692
 693        return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
 694                                data->start_level, ptep);
 695}
 696
 697static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 698                             size_t size, struct iommu_iotlb_gather *gather)
 699{
 700        return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
 701}
 702
 703static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 704                                         unsigned long iova)
 705{
 706        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 707        arm_lpae_iopte pte, *ptep = data->pgd;
 708        int lvl = data->start_level;
 709
 710        do {
 711                /* Valid IOPTE pointer? */
 712                if (!ptep)
 713                        return 0;
 714
 715                /* Grab the IOPTE we're interested in */
 716                ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 717                pte = READ_ONCE(*ptep);
 718
 719                /* Valid entry? */
 720                if (!pte)
 721                        return 0;
 722
 723                /* Leaf entry? */
 724                if (iopte_leaf(pte, lvl, data->iop.fmt))
 725                        goto found_translation;
 726
 727                /* Take it to the next level */
 728                ptep = iopte_deref(pte, data);
 729        } while (++lvl < ARM_LPAE_MAX_LEVELS);
 730
 731        /* Ran out of page tables to walk */
 732        return 0;
 733
 734found_translation:
 735        iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
 736        return iopte_to_paddr(pte, data) | iova;
 737}
 738
 739static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 740{
 741        unsigned long granule, page_sizes;
 742        unsigned int max_addr_bits = 48;
 743
 744        /*
 745         * We need to restrict the supported page sizes to match the
 746         * translation regime for a particular granule. Aim to match
 747         * the CPU page size if possible, otherwise prefer smaller sizes.
 748         * While we're at it, restrict the block sizes to match the
 749         * chosen granule.
 750         */
 751        if (cfg->pgsize_bitmap & PAGE_SIZE)
 752                granule = PAGE_SIZE;
 753        else if (cfg->pgsize_bitmap & ~PAGE_MASK)
 754                granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
 755        else if (cfg->pgsize_bitmap & PAGE_MASK)
 756                granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
 757        else
 758                granule = 0;
 759
 760        switch (granule) {
 761        case SZ_4K:
 762                page_sizes = (SZ_4K | SZ_2M | SZ_1G);
 763                break;
 764        case SZ_16K:
 765                page_sizes = (SZ_16K | SZ_32M);
 766                break;
 767        case SZ_64K:
 768                max_addr_bits = 52;
 769                page_sizes = (SZ_64K | SZ_512M);
 770                if (cfg->oas > 48)
 771                        page_sizes |= 1ULL << 42; /* 4TB */
 772                break;
 773        default:
 774                page_sizes = 0;
 775        }
 776
 777        cfg->pgsize_bitmap &= page_sizes;
 778        cfg->ias = min(cfg->ias, max_addr_bits);
 779        cfg->oas = min(cfg->oas, max_addr_bits);
 780}
 781
 782static struct arm_lpae_io_pgtable *
 783arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 784{
 785        struct arm_lpae_io_pgtable *data;
 786        int levels, va_bits, pg_shift;
 787
 788        arm_lpae_restrict_pgsizes(cfg);
 789
 790        if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
 791                return NULL;
 792
 793        if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
 794                return NULL;
 795
 796        if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
 797                return NULL;
 798
 799        data = kmalloc(sizeof(*data), GFP_KERNEL);
 800        if (!data)
 801                return NULL;
 802
 803        pg_shift = __ffs(cfg->pgsize_bitmap);
 804        data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
 805
 806        va_bits = cfg->ias - pg_shift;
 807        levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
 808        data->start_level = ARM_LPAE_MAX_LEVELS - levels;
 809
 810        /* Calculate the actual size of our pgd (without concatenation) */
 811        data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
 812
 813        data->iop.ops = (struct io_pgtable_ops) {
 814                .map            = arm_lpae_map,
 815                .map_pages      = arm_lpae_map_pages,
 816                .unmap          = arm_lpae_unmap,
 817                .unmap_pages    = arm_lpae_unmap_pages,
 818                .iova_to_phys   = arm_lpae_iova_to_phys,
 819        };
 820
 821        return data;
 822}
 823
 824static struct io_pgtable *
 825arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 826{
 827        u64 reg;
 828        struct arm_lpae_io_pgtable *data;
 829        typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
 830        bool tg1;
 831
 832        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 833                            IO_PGTABLE_QUIRK_ARM_TTBR1 |
 834                            IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
 835                return NULL;
 836
 837        data = arm_lpae_alloc_pgtable(cfg);
 838        if (!data)
 839                return NULL;
 840
 841        /* TCR */
 842        if (cfg->coherent_walk) {
 843                tcr->sh = ARM_LPAE_TCR_SH_IS;
 844                tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
 845                tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 846                if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
 847                        goto out_free_data;
 848        } else {
 849                tcr->sh = ARM_LPAE_TCR_SH_OS;
 850                tcr->irgn = ARM_LPAE_TCR_RGN_NC;
 851                if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
 852                        tcr->orgn = ARM_LPAE_TCR_RGN_NC;
 853                else
 854                        tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 855        }
 856
 857        tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
 858        switch (ARM_LPAE_GRANULE(data)) {
 859        case SZ_4K:
 860                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
 861                break;
 862        case SZ_16K:
 863                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
 864                break;
 865        case SZ_64K:
 866                tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
 867                break;
 868        }
 869
 870        switch (cfg->oas) {
 871        case 32:
 872                tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
 873                break;
 874        case 36:
 875                tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
 876                break;
 877        case 40:
 878                tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
 879                break;
 880        case 42:
 881                tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
 882                break;
 883        case 44:
 884                tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
 885                break;
 886        case 48:
 887                tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
 888                break;
 889        case 52:
 890                tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
 891                break;
 892        default:
 893                goto out_free_data;
 894        }
 895
 896        tcr->tsz = 64ULL - cfg->ias;
 897
 898        /* MAIRs */
 899        reg = (ARM_LPAE_MAIR_ATTR_NC
 900               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
 901              (ARM_LPAE_MAIR_ATTR_WBRWA
 902               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
 903              (ARM_LPAE_MAIR_ATTR_DEVICE
 904               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
 905              (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
 906               << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
 907
 908        cfg->arm_lpae_s1_cfg.mair = reg;
 909
 910        /* Looking good; allocate a pgd */
 911        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
 912                                           GFP_KERNEL, cfg);
 913        if (!data->pgd)
 914                goto out_free_data;
 915
 916        /* Ensure the empty pgd is visible before any actual TTBR write */
 917        wmb();
 918
 919        /* TTBR */
 920        cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
 921        return &data->iop;
 922
 923out_free_data:
 924        kfree(data);
 925        return NULL;
 926}
 927
 928static struct io_pgtable *
 929arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 930{
 931        u64 sl;
 932        struct arm_lpae_io_pgtable *data;
 933        typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
 934
 935        /* The NS quirk doesn't apply at stage 2 */
 936        if (cfg->quirks)
 937                return NULL;
 938
 939        data = arm_lpae_alloc_pgtable(cfg);
 940        if (!data)
 941                return NULL;
 942
 943        /*
 944         * Concatenate PGDs at level 1 if possible in order to reduce
 945         * the depth of the stage-2 walk.
 946         */
 947        if (data->start_level == 0) {
 948                unsigned long pgd_pages;
 949
 950                pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
 951                if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
 952                        data->pgd_bits += data->bits_per_level;
 953                        data->start_level++;
 954                }
 955        }
 956
 957        /* VTCR */
 958        if (cfg->coherent_walk) {
 959                vtcr->sh = ARM_LPAE_TCR_SH_IS;
 960                vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
 961                vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
 962        } else {
 963                vtcr->sh = ARM_LPAE_TCR_SH_OS;
 964                vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
 965                vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
 966        }
 967
 968        sl = data->start_level;
 969
 970        switch (ARM_LPAE_GRANULE(data)) {
 971        case SZ_4K:
 972                vtcr->tg = ARM_LPAE_TCR_TG0_4K;
 973                sl++; /* SL0 format is different for 4K granule size */
 974                break;
 975        case SZ_16K:
 976                vtcr->tg = ARM_LPAE_TCR_TG0_16K;
 977                break;
 978        case SZ_64K:
 979                vtcr->tg = ARM_LPAE_TCR_TG0_64K;
 980                break;
 981        }
 982
 983        switch (cfg->oas) {
 984        case 32:
 985                vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
 986                break;
 987        case 36:
 988                vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
 989                break;
 990        case 40:
 991                vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
 992                break;
 993        case 42:
 994                vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
 995                break;
 996        case 44:
 997                vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
 998                break;
 999        case 48:
1000                vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
1001                break;
1002        case 52:
1003                vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
1004                break;
1005        default:
1006                goto out_free_data;
1007        }
1008
1009        vtcr->tsz = 64ULL - cfg->ias;
1010        vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
1011
1012        /* Allocate pgd pages */
1013        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1014                                           GFP_KERNEL, cfg);
1015        if (!data->pgd)
1016                goto out_free_data;
1017
1018        /* Ensure the empty pgd is visible before any actual TTBR write */
1019        wmb();
1020
1021        /* VTTBR */
1022        cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
1023        return &data->iop;
1024
1025out_free_data:
1026        kfree(data);
1027        return NULL;
1028}
1029
1030static struct io_pgtable *
1031arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
1032{
1033        if (cfg->ias > 32 || cfg->oas > 40)
1034                return NULL;
1035
1036        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1037        return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
1038}
1039
1040static struct io_pgtable *
1041arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1042{
1043        if (cfg->ias > 40 || cfg->oas > 40)
1044                return NULL;
1045
1046        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1047        return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
1048}
1049
1050static struct io_pgtable *
1051arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1052{
1053        struct arm_lpae_io_pgtable *data;
1054
1055        /* No quirks for Mali (hopefully) */
1056        if (cfg->quirks)
1057                return NULL;
1058
1059        if (cfg->ias > 48 || cfg->oas > 40)
1060                return NULL;
1061
1062        cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1063
1064        data = arm_lpae_alloc_pgtable(cfg);
1065        if (!data)
1066                return NULL;
1067
1068        /* Mali seems to need a full 4-level table regardless of IAS */
1069        if (data->start_level > 0) {
1070                data->start_level = 0;
1071                data->pgd_bits = 0;
1072        }
1073        /*
1074         * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1075         * best we can do is mimic the out-of-tree driver and hope that the
1076         * "implementation-defined caching policy" is good enough. Similarly,
1077         * we'll use it for the sake of a valid attribute for our 'device'
1078         * index, although callers should never request that in practice.
1079         */
1080        cfg->arm_mali_lpae_cfg.memattr =
1081                (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1082                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1083                (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1084                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1085                (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1086                 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
1087
1088        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1089                                           cfg);
1090        if (!data->pgd)
1091                goto out_free_data;
1092
1093        /* Ensure the empty pgd is visible before TRANSTAB can be written */
1094        wmb();
1095
1096        cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
1097                                          ARM_MALI_LPAE_TTBR_READ_INNER |
1098                                          ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
1099        if (cfg->coherent_walk)
1100                cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
1101
1102        return &data->iop;
1103
1104out_free_data:
1105        kfree(data);
1106        return NULL;
1107}
1108
1109static struct io_pgtable *
1110apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1111{
1112        struct arm_lpae_io_pgtable *data;
1113        int i;
1114
1115        if (cfg->oas > 36)
1116                return NULL;
1117
1118        data = arm_lpae_alloc_pgtable(cfg);
1119        if (!data)
1120                return NULL;
1121
1122        /*
1123         * The table format itself always uses two levels, but the total VA
1124         * space is mapped by four separate tables, making the MMIO registers
1125         * an effective "level 1". For simplicity, though, we treat this
1126         * equivalently to LPAE stage 2 concatenation at level 2, with the
1127         * additional TTBRs each just pointing at consecutive pages.
1128         */
1129        if (data->start_level < 1)
1130                goto out_free_data;
1131        if (data->start_level == 1 && data->pgd_bits > 2)
1132                goto out_free_data;
1133        if (data->start_level > 1)
1134                data->pgd_bits = 0;
1135        data->start_level = 2;
1136        cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits;
1137        data->pgd_bits += data->bits_per_level;
1138
1139        data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1140                                           cfg);
1141        if (!data->pgd)
1142                goto out_free_data;
1143
1144        for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i)
1145                cfg->apple_dart_cfg.ttbr[i] =
1146                        virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data));
1147
1148        return &data->iop;
1149
1150out_free_data:
1151        kfree(data);
1152        return NULL;
1153}
1154
1155struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
1156        .alloc  = arm_64_lpae_alloc_pgtable_s1,
1157        .free   = arm_lpae_free_pgtable,
1158};
1159
1160struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
1161        .alloc  = arm_64_lpae_alloc_pgtable_s2,
1162        .free   = arm_lpae_free_pgtable,
1163};
1164
1165struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
1166        .alloc  = arm_32_lpae_alloc_pgtable_s1,
1167        .free   = arm_lpae_free_pgtable,
1168};
1169
1170struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
1171        .alloc  = arm_32_lpae_alloc_pgtable_s2,
1172        .free   = arm_lpae_free_pgtable,
1173};
1174
1175struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
1176        .alloc  = arm_mali_lpae_alloc_pgtable,
1177        .free   = arm_lpae_free_pgtable,
1178};
1179
1180struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = {
1181        .alloc  = apple_dart_alloc_pgtable,
1182        .free   = arm_lpae_free_pgtable,
1183};
1184
1185#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1186
1187static struct io_pgtable_cfg *cfg_cookie __initdata;
1188
1189static void __init dummy_tlb_flush_all(void *cookie)
1190{
1191        WARN_ON(cookie != cfg_cookie);
1192}
1193
1194static void __init dummy_tlb_flush(unsigned long iova, size_t size,
1195                                   size_t granule, void *cookie)
1196{
1197        WARN_ON(cookie != cfg_cookie);
1198        WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
1199}
1200
1201static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
1202                                      unsigned long iova, size_t granule,
1203                                      void *cookie)
1204{
1205        dummy_tlb_flush(iova, granule, granule, cookie);
1206}
1207
1208static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
1209        .tlb_flush_all  = dummy_tlb_flush_all,
1210        .tlb_flush_walk = dummy_tlb_flush,
1211        .tlb_add_page   = dummy_tlb_add_page,
1212};
1213
1214static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
1215{
1216        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
1217        struct io_pgtable_cfg *cfg = &data->iop.cfg;
1218
1219        pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1220                cfg->pgsize_bitmap, cfg->ias);
1221        pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1222                ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
1223                ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
1224}
1225
1226#define __FAIL(ops, i)  ({                                              \
1227                WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
1228                arm_lpae_dump_ops(ops);                                 \
1229                selftest_running = false;                               \
1230                -EFAULT;                                                \
1231})
1232
1233static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
1234{
1235        static const enum io_pgtable_fmt fmts[] __initconst = {
1236                ARM_64_LPAE_S1,
1237                ARM_64_LPAE_S2,
1238        };
1239
1240        int i, j;
1241        unsigned long iova;
1242        size_t size;
1243        struct io_pgtable_ops *ops;
1244
1245        selftest_running = true;
1246
1247        for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
1248                cfg_cookie = cfg;
1249                ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
1250                if (!ops) {
1251                        pr_err("selftest: failed to allocate io pgtable ops\n");
1252                        return -ENOMEM;
1253                }
1254
1255                /*
1256                 * Initial sanity checks.
1257                 * Empty page tables shouldn't provide any translations.
1258                 */
1259                if (ops->iova_to_phys(ops, 42))
1260                        return __FAIL(ops, i);
1261
1262                if (ops->iova_to_phys(ops, SZ_1G + 42))
1263                        return __FAIL(ops, i);
1264
1265                if (ops->iova_to_phys(ops, SZ_2G + 42))
1266                        return __FAIL(ops, i);
1267
1268                /*
1269                 * Distinct mappings of different granule sizes.
1270                 */
1271                iova = 0;
1272                for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1273                        size = 1UL << j;
1274
1275                        if (ops->map(ops, iova, iova, size, IOMMU_READ |
1276                                                            IOMMU_WRITE |
1277                                                            IOMMU_NOEXEC |
1278                                                            IOMMU_CACHE, GFP_KERNEL))
1279                                return __FAIL(ops, i);
1280
1281                        /* Overlapping mappings */
1282                        if (!ops->map(ops, iova, iova + size, size,
1283                                      IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
1284                                return __FAIL(ops, i);
1285
1286                        if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1287                                return __FAIL(ops, i);
1288
1289                        iova += SZ_1G;
1290                }
1291
1292                /* Partial unmap */
1293                size = 1UL << __ffs(cfg->pgsize_bitmap);
1294                if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
1295                        return __FAIL(ops, i);
1296
1297                /* Remap of partial unmap */
1298                if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL))
1299                        return __FAIL(ops, i);
1300
1301                if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
1302                        return __FAIL(ops, i);
1303
1304                /* Full unmap */
1305                iova = 0;
1306                for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1307                        size = 1UL << j;
1308
1309                        if (ops->unmap(ops, iova, size, NULL) != size)
1310                                return __FAIL(ops, i);
1311
1312                        if (ops->iova_to_phys(ops, iova + 42))
1313                                return __FAIL(ops, i);
1314
1315                        /* Remap full block */
1316                        if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
1317                                return __FAIL(ops, i);
1318
1319                        if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1320                                return __FAIL(ops, i);
1321
1322                        iova += SZ_1G;
1323                }
1324
1325                free_io_pgtable_ops(ops);
1326        }
1327
1328        selftest_running = false;
1329        return 0;
1330}
1331
1332static int __init arm_lpae_do_selftests(void)
1333{
1334        static const unsigned long pgsize[] __initconst = {
1335                SZ_4K | SZ_2M | SZ_1G,
1336                SZ_16K | SZ_32M,
1337                SZ_64K | SZ_512M,
1338        };
1339
1340        static const unsigned int ias[] __initconst = {
1341                32, 36, 40, 42, 44, 48,
1342        };
1343
1344        int i, j, pass = 0, fail = 0;
1345        struct io_pgtable_cfg cfg = {
1346                .tlb = &dummy_tlb_ops,
1347                .oas = 48,
1348                .coherent_walk = true,
1349        };
1350
1351        for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
1352                for (j = 0; j < ARRAY_SIZE(ias); ++j) {
1353                        cfg.pgsize_bitmap = pgsize[i];
1354                        cfg.ias = ias[j];
1355                        pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1356                                pgsize[i], ias[j]);
1357                        if (arm_lpae_run_tests(&cfg))
1358                                fail++;
1359                        else
1360                                pass++;
1361                }
1362        }
1363
1364        pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
1365        return fail ? -EFAULT : 0;
1366}
1367subsys_initcall(arm_lpae_do_selftests);
1368#endif
1369