linux/drivers/iommu/io-pgtable-arm-v7s.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * CPU-agnostic ARM page table allocator.
   4 *
   5 * ARMv7 Short-descriptor format, supporting
   6 * - Basic memory attributes
   7 * - Simplified access permissions (AP[2:1] model)
   8 * - Backwards-compatible TEX remap
   9 * - Large pages/supersections (if indicated by the caller)
  10 *
  11 * Not supporting:
  12 * - Legacy access permissions (AP[2:0] model)
  13 *
  14 * Almost certainly never supporting:
  15 * - PXN
  16 * - Domains
  17 *
  18 * Copyright (C) 2014-2015 ARM Limited
  19 * Copyright (c) 2014-2015 MediaTek Inc.
  20 */
  21
  22#define pr_fmt(fmt)     "arm-v7s io-pgtable: " fmt
  23
  24#include <linux/atomic.h>
  25#include <linux/dma-mapping.h>
  26#include <linux/gfp.h>
  27#include <linux/io-pgtable.h>
  28#include <linux/iommu.h>
  29#include <linux/kernel.h>
  30#include <linux/kmemleak.h>
  31#include <linux/sizes.h>
  32#include <linux/slab.h>
  33#include <linux/spinlock.h>
  34#include <linux/types.h>
  35
  36#include <asm/barrier.h>
  37
  38/* Struct accessors */
  39#define io_pgtable_to_data(x)                                           \
  40        container_of((x), struct arm_v7s_io_pgtable, iop)
  41
  42#define io_pgtable_ops_to_data(x)                                       \
  43        io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
  44
  45/*
  46 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
  47 * and 12 bits in a page. With some carefully-chosen coefficients we can
  48 * hide the ugly inconsistencies behind these macros and at least let the
  49 * rest of the code pretend to be somewhat sane.
  50 */
  51#define ARM_V7S_ADDR_BITS               32
  52#define _ARM_V7S_LVL_BITS(lvl)          (16 - (lvl) * 4)
  53#define ARM_V7S_LVL_SHIFT(lvl)          (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
  54#define ARM_V7S_TABLE_SHIFT             10
  55
  56#define ARM_V7S_PTES_PER_LVL(lvl)       (1 << _ARM_V7S_LVL_BITS(lvl))
  57#define ARM_V7S_TABLE_SIZE(lvl)                                         \
  58        (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
  59
  60#define ARM_V7S_BLOCK_SIZE(lvl)         (1UL << ARM_V7S_LVL_SHIFT(lvl))
  61#define ARM_V7S_LVL_MASK(lvl)           ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
  62#define ARM_V7S_TABLE_MASK              ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
  63#define _ARM_V7S_IDX_MASK(lvl)          (ARM_V7S_PTES_PER_LVL(lvl) - 1)
  64#define ARM_V7S_LVL_IDX(addr, lvl)      ({                              \
  65        int _l = lvl;                                                   \
  66        ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
  67})
  68
  69/*
  70 * Large page/supersection entries are effectively a block of 16 page/section
  71 * entries, along the lines of the LPAE contiguous hint, but all with the
  72 * same output address. For want of a better common name we'll call them
  73 * "contiguous" versions of their respective page/section entries here, but
  74 * noting the distinction (WRT to TLB maintenance) that they represent *one*
  75 * entry repeated 16 times, not 16 separate entries (as in the LPAE case).
  76 */
  77#define ARM_V7S_CONT_PAGES              16
  78
  79/* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
  80#define ARM_V7S_PTE_TYPE_TABLE          0x1
  81#define ARM_V7S_PTE_TYPE_PAGE           0x2
  82#define ARM_V7S_PTE_TYPE_CONT_PAGE      0x1
  83
  84#define ARM_V7S_PTE_IS_VALID(pte)       (((pte) & 0x3) != 0)
  85#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
  86        ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
  87
  88/* Page table bits */
  89#define ARM_V7S_ATTR_XN(lvl)            BIT(4 * (2 - (lvl)))
  90#define ARM_V7S_ATTR_B                  BIT(2)
  91#define ARM_V7S_ATTR_C                  BIT(3)
  92#define ARM_V7S_ATTR_NS_TABLE           BIT(3)
  93#define ARM_V7S_ATTR_NS_SECTION         BIT(19)
  94
  95#define ARM_V7S_CONT_SECTION            BIT(18)
  96#define ARM_V7S_CONT_PAGE_XN_SHIFT      15
  97
  98/*
  99 * The attribute bits are consistently ordered*, but occupy bits [17:10] of
 100 * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual
 101 * fields relative to that 8-bit block, plus a total shift relative to the PTE.
 102 */
 103#define ARM_V7S_ATTR_SHIFT(lvl)         (16 - (lvl) * 6)
 104
 105#define ARM_V7S_ATTR_MASK               0xff
 106#define ARM_V7S_ATTR_AP0                BIT(0)
 107#define ARM_V7S_ATTR_AP1                BIT(1)
 108#define ARM_V7S_ATTR_AP2                BIT(5)
 109#define ARM_V7S_ATTR_S                  BIT(6)
 110#define ARM_V7S_ATTR_NG                 BIT(7)
 111#define ARM_V7S_TEX_SHIFT               2
 112#define ARM_V7S_TEX_MASK                0x7
 113#define ARM_V7S_ATTR_TEX(val)           (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 114
 115/* MediaTek extend the two bits for PA 32bit/33bit */
 116#define ARM_V7S_ATTR_MTK_PA_BIT32       BIT(9)
 117#define ARM_V7S_ATTR_MTK_PA_BIT33       BIT(4)
 118
 119/* *well, except for TEX on level 2 large pages, of course :( */
 120#define ARM_V7S_CONT_PAGE_TEX_SHIFT     6
 121#define ARM_V7S_CONT_PAGE_TEX_MASK      (ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
 122
 123/* Simplified access permissions */
 124#define ARM_V7S_PTE_AF                  ARM_V7S_ATTR_AP0
 125#define ARM_V7S_PTE_AP_UNPRIV           ARM_V7S_ATTR_AP1
 126#define ARM_V7S_PTE_AP_RDONLY           ARM_V7S_ATTR_AP2
 127
 128/* Register bits */
 129#define ARM_V7S_RGN_NC                  0
 130#define ARM_V7S_RGN_WBWA                1
 131#define ARM_V7S_RGN_WT                  2
 132#define ARM_V7S_RGN_WB                  3
 133
 134#define ARM_V7S_PRRR_TYPE_DEVICE        1
 135#define ARM_V7S_PRRR_TYPE_NORMAL        2
 136#define ARM_V7S_PRRR_TR(n, type)        (((type) & 0x3) << ((n) * 2))
 137#define ARM_V7S_PRRR_DS0                BIT(16)
 138#define ARM_V7S_PRRR_DS1                BIT(17)
 139#define ARM_V7S_PRRR_NS0                BIT(18)
 140#define ARM_V7S_PRRR_NS1                BIT(19)
 141#define ARM_V7S_PRRR_NOS(n)             BIT((n) + 24)
 142
 143#define ARM_V7S_NMRR_IR(n, attr)        (((attr) & 0x3) << ((n) * 2))
 144#define ARM_V7S_NMRR_OR(n, attr)        (((attr) & 0x3) << ((n) * 2 + 16))
 145
 146#define ARM_V7S_TTBR_S                  BIT(1)
 147#define ARM_V7S_TTBR_NOS                BIT(5)
 148#define ARM_V7S_TTBR_ORGN_ATTR(attr)    (((attr) & 0x3) << 3)
 149#define ARM_V7S_TTBR_IRGN_ATTR(attr)                                    \
 150        ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))
 151
 152#ifdef CONFIG_ZONE_DMA32
 153#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
 154#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
 155#else
 156#define ARM_V7S_TABLE_GFP_DMA GFP_DMA
 157#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
 158#endif
 159
 160typedef u32 arm_v7s_iopte;
 161
 162static bool selftest_running;
 163
 164struct arm_v7s_io_pgtable {
 165        struct io_pgtable       iop;
 166
 167        arm_v7s_iopte           *pgd;
 168        struct kmem_cache       *l2_tables;
 169        spinlock_t              split_lock;
 170};
 171
 172static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
 173
 174static dma_addr_t __arm_v7s_dma_addr(void *pages)
 175{
 176        return (dma_addr_t)virt_to_phys(pages);
 177}
 178
 179static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
 180{
 181        return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
 182                (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
 183}
 184
 185static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
 186                                    struct io_pgtable_cfg *cfg)
 187{
 188        arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
 189
 190        if (!arm_v7s_is_mtk_enabled(cfg))
 191                return pte;
 192
 193        if (paddr & BIT_ULL(32))
 194                pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
 195        if (paddr & BIT_ULL(33))
 196                pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
 197        return pte;
 198}
 199
 200static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 201                                  struct io_pgtable_cfg *cfg)
 202{
 203        arm_v7s_iopte mask;
 204        phys_addr_t paddr;
 205
 206        if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
 207                mask = ARM_V7S_TABLE_MASK;
 208        else if (arm_v7s_pte_is_cont(pte, lvl))
 209                mask = ARM_V7S_LVL_MASK(lvl) * ARM_V7S_CONT_PAGES;
 210        else
 211                mask = ARM_V7S_LVL_MASK(lvl);
 212
 213        paddr = pte & mask;
 214        if (!arm_v7s_is_mtk_enabled(cfg))
 215                return paddr;
 216
 217        if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
 218                paddr |= BIT_ULL(32);
 219        if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
 220                paddr |= BIT_ULL(33);
 221        return paddr;
 222}
 223
 224static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
 225                                  struct arm_v7s_io_pgtable *data)
 226{
 227        return phys_to_virt(iopte_to_paddr(pte, lvl, &data->iop.cfg));
 228}
 229
 230static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 231                                   struct arm_v7s_io_pgtable *data)
 232{
 233        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 234        struct device *dev = cfg->iommu_dev;
 235        phys_addr_t phys;
 236        dma_addr_t dma;
 237        size_t size = ARM_V7S_TABLE_SIZE(lvl);
 238        void *table = NULL;
 239
 240        if (lvl == 1)
 241                table = (void *)__get_free_pages(
 242                        __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
 243        else if (lvl == 2)
 244                table = kmem_cache_zalloc(data->l2_tables, gfp);
 245        phys = virt_to_phys(table);
 246        if (phys != (arm_v7s_iopte)phys) {
 247                /* Doesn't fit in PTE */
 248                dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
 249                goto out_free;
 250        }
 251        if (table && !cfg->coherent_walk) {
 252                dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
 253                if (dma_mapping_error(dev, dma))
 254                        goto out_free;
 255                /*
 256                 * We depend on the IOMMU being able to work with any physical
 257                 * address directly, so if the DMA layer suggests otherwise by
 258                 * translating or truncating them, that bodes very badly...
 259                 */
 260                if (dma != phys)
 261                        goto out_unmap;
 262        }
 263        if (lvl == 2)
 264                kmemleak_ignore(table);
 265        return table;
 266
 267out_unmap:
 268        dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
 269        dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
 270out_free:
 271        if (lvl == 1)
 272                free_pages((unsigned long)table, get_order(size));
 273        else
 274                kmem_cache_free(data->l2_tables, table);
 275        return NULL;
 276}
 277
 278static void __arm_v7s_free_table(void *table, int lvl,
 279                                 struct arm_v7s_io_pgtable *data)
 280{
 281        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 282        struct device *dev = cfg->iommu_dev;
 283        size_t size = ARM_V7S_TABLE_SIZE(lvl);
 284
 285        if (!cfg->coherent_walk)
 286                dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
 287                                 DMA_TO_DEVICE);
 288        if (lvl == 1)
 289                free_pages((unsigned long)table, get_order(size));
 290        else
 291                kmem_cache_free(data->l2_tables, table);
 292}
 293
 294static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
 295                               struct io_pgtable_cfg *cfg)
 296{
 297        if (cfg->coherent_walk)
 298                return;
 299
 300        dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
 301                                   num_entries * sizeof(*ptep), DMA_TO_DEVICE);
 302}
 303static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte,
 304                              int num_entries, struct io_pgtable_cfg *cfg)
 305{
 306        int i;
 307
 308        for (i = 0; i < num_entries; i++)
 309                ptep[i] = pte;
 310
 311        __arm_v7s_pte_sync(ptep, num_entries, cfg);
 312}
 313
 314static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 315                                         struct io_pgtable_cfg *cfg)
 316{
 317        bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
 318        arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
 319
 320        if (!(prot & IOMMU_MMIO))
 321                pte |= ARM_V7S_ATTR_TEX(1);
 322        if (ap) {
 323                pte |= ARM_V7S_PTE_AF;
 324                if (!(prot & IOMMU_PRIV))
 325                        pte |= ARM_V7S_PTE_AP_UNPRIV;
 326                if (!(prot & IOMMU_WRITE))
 327                        pte |= ARM_V7S_PTE_AP_RDONLY;
 328        }
 329        pte <<= ARM_V7S_ATTR_SHIFT(lvl);
 330
 331        if ((prot & IOMMU_NOEXEC) && ap)
 332                pte |= ARM_V7S_ATTR_XN(lvl);
 333        if (prot & IOMMU_MMIO)
 334                pte |= ARM_V7S_ATTR_B;
 335        else if (prot & IOMMU_CACHE)
 336                pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 337
 338        pte |= ARM_V7S_PTE_TYPE_PAGE;
 339        if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 340                pte |= ARM_V7S_ATTR_NS_SECTION;
 341
 342        return pte;
 343}
 344
 345static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
 346{
 347        int prot = IOMMU_READ;
 348        arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 349
 350        if (!(attr & ARM_V7S_PTE_AP_RDONLY))
 351                prot |= IOMMU_WRITE;
 352        if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
 353                prot |= IOMMU_PRIV;
 354        if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
 355                prot |= IOMMU_MMIO;
 356        else if (pte & ARM_V7S_ATTR_C)
 357                prot |= IOMMU_CACHE;
 358        if (pte & ARM_V7S_ATTR_XN(lvl))
 359                prot |= IOMMU_NOEXEC;
 360
 361        return prot;
 362}
 363
 364static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
 365{
 366        if (lvl == 1) {
 367                pte |= ARM_V7S_CONT_SECTION;
 368        } else if (lvl == 2) {
 369                arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl);
 370                arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK;
 371
 372                pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE;
 373                pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) |
 374                       (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) |
 375                       ARM_V7S_PTE_TYPE_CONT_PAGE;
 376        }
 377        return pte;
 378}
 379
 380static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
 381{
 382        if (lvl == 1) {
 383                pte &= ~ARM_V7S_CONT_SECTION;
 384        } else if (lvl == 2) {
 385                arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
 386                arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
 387                                           ARM_V7S_CONT_PAGE_TEX_SHIFT);
 388
 389                pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
 390                pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
 391                       (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
 392                       ARM_V7S_PTE_TYPE_PAGE;
 393        }
 394        return pte;
 395}
 396
 397static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
 398{
 399        if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
 400                return pte & ARM_V7S_CONT_SECTION;
 401        else if (lvl == 2)
 402                return !(pte & ARM_V7S_PTE_TYPE_PAGE);
 403        return false;
 404}
 405
 406static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
 407                              struct iommu_iotlb_gather *, unsigned long,
 408                              size_t, int, arm_v7s_iopte *);
 409
 410static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 411                            unsigned long iova, phys_addr_t paddr, int prot,
 412                            int lvl, int num_entries, arm_v7s_iopte *ptep)
 413{
 414        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 415        arm_v7s_iopte pte;
 416        int i;
 417
 418        for (i = 0; i < num_entries; i++)
 419                if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
 420                        /*
 421                         * We need to unmap and free the old table before
 422                         * overwriting it with a block entry.
 423                         */
 424                        arm_v7s_iopte *tblp;
 425                        size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
 426
 427                        tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
 428                        if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
 429                                                    sz, lvl, tblp) != sz))
 430                                return -EINVAL;
 431                } else if (ptep[i]) {
 432                        /* We require an unmap first */
 433                        WARN_ON(!selftest_running);
 434                        return -EEXIST;
 435                }
 436
 437        pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
 438        if (num_entries > 1)
 439                pte = arm_v7s_pte_to_cont(pte, lvl);
 440
 441        pte |= paddr_to_iopte(paddr, lvl, cfg);
 442
 443        __arm_v7s_set_pte(ptep, pte, num_entries, cfg);
 444        return 0;
 445}
 446
 447static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
 448                                           arm_v7s_iopte *ptep,
 449                                           arm_v7s_iopte curr,
 450                                           struct io_pgtable_cfg *cfg)
 451{
 452        arm_v7s_iopte old, new;
 453
 454        new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
 455        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 456                new |= ARM_V7S_ATTR_NS_TABLE;
 457
 458        /*
 459         * Ensure the table itself is visible before its PTE can be.
 460         * Whilst we could get away with cmpxchg64_release below, this
 461         * doesn't have any ordering semantics when !CONFIG_SMP.
 462         */
 463        dma_wmb();
 464
 465        old = cmpxchg_relaxed(ptep, curr, new);
 466        __arm_v7s_pte_sync(ptep, 1, cfg);
 467
 468        return old;
 469}
 470
 471static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 472                         phys_addr_t paddr, size_t size, int prot,
 473                         int lvl, arm_v7s_iopte *ptep, gfp_t gfp)
 474{
 475        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 476        arm_v7s_iopte pte, *cptep;
 477        int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
 478
 479        /* Find our entry at the current level */
 480        ptep += ARM_V7S_LVL_IDX(iova, lvl);
 481
 482        /* If we can install a leaf entry at this level, then do so */
 483        if (num_entries)
 484                return arm_v7s_init_pte(data, iova, paddr, prot,
 485                                        lvl, num_entries, ptep);
 486
 487        /* We can't allocate tables at the final level */
 488        if (WARN_ON(lvl == 2))
 489                return -EINVAL;
 490
 491        /* Grab a pointer to the next level */
 492        pte = READ_ONCE(*ptep);
 493        if (!pte) {
 494                cptep = __arm_v7s_alloc_table(lvl + 1, gfp, data);
 495                if (!cptep)
 496                        return -ENOMEM;
 497
 498                pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
 499                if (pte)
 500                        __arm_v7s_free_table(cptep, lvl + 1, data);
 501        } else {
 502                /* We've no easy way of knowing if it's synced yet, so... */
 503                __arm_v7s_pte_sync(ptep, 1, cfg);
 504        }
 505
 506        if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
 507                cptep = iopte_deref(pte, lvl, data);
 508        } else if (pte) {
 509                /* We require an unmap first */
 510                WARN_ON(!selftest_running);
 511                return -EEXIST;
 512        }
 513
 514        /* Rinse, repeat */
 515        return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
 516}
 517
 518static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
 519                        phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 520{
 521        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 522        struct io_pgtable *iop = &data->iop;
 523        int ret;
 524
 525        if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
 526                    paddr >= (1ULL << data->iop.cfg.oas)))
 527                return -ERANGE;
 528
 529        /* If no access, then nothing to do */
 530        if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 531                return 0;
 532
 533        ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
 534        /*
 535         * Synchronise all PTE updates for the new mapping before there's
 536         * a chance for anything to kick off a table walk for the new iova.
 537         */
 538        if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
 539                io_pgtable_tlb_flush_walk(iop, iova, size,
 540                                          ARM_V7S_BLOCK_SIZE(2));
 541        } else {
 542                wmb();
 543        }
 544
 545        return ret;
 546}
 547
 548static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 549{
 550        struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
 551        int i;
 552
 553        for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
 554                arm_v7s_iopte pte = data->pgd[i];
 555
 556                if (ARM_V7S_PTE_IS_TABLE(pte, 1))
 557                        __arm_v7s_free_table(iopte_deref(pte, 1, data),
 558                                             2, data);
 559        }
 560        __arm_v7s_free_table(data->pgd, 1, data);
 561        kmem_cache_destroy(data->l2_tables);
 562        kfree(data);
 563}
 564
 565static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 566                                        unsigned long iova, int idx, int lvl,
 567                                        arm_v7s_iopte *ptep)
 568{
 569        struct io_pgtable *iop = &data->iop;
 570        arm_v7s_iopte pte;
 571        size_t size = ARM_V7S_BLOCK_SIZE(lvl);
 572        int i;
 573
 574        /* Check that we didn't lose a race to get the lock */
 575        pte = *ptep;
 576        if (!arm_v7s_pte_is_cont(pte, lvl))
 577                return pte;
 578
 579        ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
 580        pte = arm_v7s_cont_to_pte(pte, lvl);
 581        for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
 582                ptep[i] = pte + i * size;
 583
 584        __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 585
 586        size *= ARM_V7S_CONT_PAGES;
 587        io_pgtable_tlb_flush_walk(iop, iova, size, size);
 588        return pte;
 589}
 590
 591static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 592                                      struct iommu_iotlb_gather *gather,
 593                                      unsigned long iova, size_t size,
 594                                      arm_v7s_iopte blk_pte,
 595                                      arm_v7s_iopte *ptep)
 596{
 597        struct io_pgtable_cfg *cfg = &data->iop.cfg;
 598        arm_v7s_iopte pte, *tablep;
 599        int i, unmap_idx, num_entries, num_ptes;
 600
 601        tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
 602        if (!tablep)
 603                return 0; /* Bytes unmapped */
 604
 605        num_ptes = ARM_V7S_PTES_PER_LVL(2);
 606        num_entries = size >> ARM_V7S_LVL_SHIFT(2);
 607        unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
 608
 609        pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
 610        if (num_entries > 1)
 611                pte = arm_v7s_pte_to_cont(pte, 2);
 612
 613        for (i = 0; i < num_ptes; i += num_entries, pte += size) {
 614                /* Unmap! */
 615                if (i == unmap_idx)
 616                        continue;
 617
 618                __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
 619        }
 620
 621        pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
 622        if (pte != blk_pte) {
 623                __arm_v7s_free_table(tablep, 2, data);
 624
 625                if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
 626                        return 0;
 627
 628                tablep = iopte_deref(pte, 1, data);
 629                return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
 630        }
 631
 632        io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
 633        return size;
 634}
 635
 636static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 637                              struct iommu_iotlb_gather *gather,
 638                              unsigned long iova, size_t size, int lvl,
 639                              arm_v7s_iopte *ptep)
 640{
 641        arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
 642        struct io_pgtable *iop = &data->iop;
 643        int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
 644
 645        /* Something went horribly wrong and we ran out of page table */
 646        if (WARN_ON(lvl > 2))
 647                return 0;
 648
 649        idx = ARM_V7S_LVL_IDX(iova, lvl);
 650        ptep += idx;
 651        do {
 652                pte[i] = READ_ONCE(ptep[i]);
 653                if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
 654                        return 0;
 655        } while (++i < num_entries);
 656
 657        /*
 658         * If we've hit a contiguous 'large page' entry at this level, it
 659         * needs splitting first, unless we're unmapping the whole lot.
 660         *
 661         * For splitting, we can't rewrite 16 PTEs atomically, and since we
 662         * can't necessarily assume TEX remap we don't have a software bit to
 663         * mark live entries being split. In practice (i.e. DMA API code), we
 664         * will never be splitting large pages anyway, so just wrap this edge
 665         * case in a lock for the sake of correctness and be done with it.
 666         */
 667        if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
 668                unsigned long flags;
 669
 670                spin_lock_irqsave(&data->split_lock, flags);
 671                pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
 672                spin_unlock_irqrestore(&data->split_lock, flags);
 673        }
 674
 675        /* If the size matches this level, we're in the right place */
 676        if (num_entries) {
 677                size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);
 678
 679                __arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg);
 680
 681                for (i = 0; i < num_entries; i++) {
 682                        if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
 683                                /* Also flush any partial walks */
 684                                io_pgtable_tlb_flush_walk(iop, iova, blk_size,
 685                                                ARM_V7S_BLOCK_SIZE(lvl + 1));
 686                                ptep = iopte_deref(pte[i], lvl, data);
 687                                __arm_v7s_free_table(ptep, lvl + 1, data);
 688                        } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
 689                                /*
 690                                 * Order the PTE update against queueing the IOVA, to
 691                                 * guarantee that a flush callback from a different CPU
 692                                 * has observed it before the TLBIALL can be issued.
 693                                 */
 694                                smp_wmb();
 695                        } else {
 696                                io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
 697                        }
 698                        iova += blk_size;
 699                }
 700                return size;
 701        } else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
 702                /*
 703                 * Insert a table at the next level to map the old region,
 704                 * minus the part we want to unmap
 705                 */
 706                return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
 707                                               ptep);
 708        }
 709
 710        /* Keep on walkin' */
 711        ptep = iopte_deref(pte[0], lvl, data);
 712        return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 713}
 714
 715static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 716                            size_t size, struct iommu_iotlb_gather *gather)
 717{
 718        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 719
 720        if (WARN_ON(upper_32_bits(iova)))
 721                return 0;
 722
 723        return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
 724}
 725
 726static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
 727                                        unsigned long iova)
 728{
 729        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 730        arm_v7s_iopte *ptep = data->pgd, pte;
 731        int lvl = 0;
 732        u32 mask;
 733
 734        do {
 735                ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
 736                pte = READ_ONCE(*ptep);
 737                ptep = iopte_deref(pte, lvl, data);
 738        } while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
 739
 740        if (!ARM_V7S_PTE_IS_VALID(pte))
 741                return 0;
 742
 743        mask = ARM_V7S_LVL_MASK(lvl);
 744        if (arm_v7s_pte_is_cont(pte, lvl))
 745                mask *= ARM_V7S_CONT_PAGES;
 746        return iopte_to_paddr(pte, lvl, &data->iop.cfg) | (iova & ~mask);
 747}
 748
 749static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 750                                                void *cookie)
 751{
 752        struct arm_v7s_io_pgtable *data;
 753
 754        if (cfg->ias > ARM_V7S_ADDR_BITS)
 755                return NULL;
 756
 757        if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
 758                return NULL;
 759
 760        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 761                            IO_PGTABLE_QUIRK_NO_PERMS |
 762                            IO_PGTABLE_QUIRK_TLBI_ON_MAP |
 763                            IO_PGTABLE_QUIRK_ARM_MTK_EXT |
 764                            IO_PGTABLE_QUIRK_NON_STRICT))
 765                return NULL;
 766
 767        /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
 768        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
 769            !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
 770                        return NULL;
 771
 772        data = kmalloc(sizeof(*data), GFP_KERNEL);
 773        if (!data)
 774                return NULL;
 775
 776        spin_lock_init(&data->split_lock);
 777        data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
 778                                            ARM_V7S_TABLE_SIZE(2),
 779                                            ARM_V7S_TABLE_SIZE(2),
 780                                            ARM_V7S_TABLE_SLAB_FLAGS, NULL);
 781        if (!data->l2_tables)
 782                goto out_free_data;
 783
 784        data->iop.ops = (struct io_pgtable_ops) {
 785                .map            = arm_v7s_map,
 786                .unmap          = arm_v7s_unmap,
 787                .iova_to_phys   = arm_v7s_iova_to_phys,
 788        };
 789
 790        /* We have to do this early for __arm_v7s_alloc_table to work... */
 791        data->iop.cfg = *cfg;
 792
 793        /*
 794         * Unless the IOMMU driver indicates supersection support by
 795         * having SZ_16M set in the initial bitmap, they won't be used.
 796         */
 797        cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
 798
 799        /* TCR: T0SZ=0, EAE=0 (if applicable) */
 800        cfg->arm_v7s_cfg.tcr = 0;
 801
 802        /*
 803         * TEX remap: the indices used map to the closest equivalent types
 804         * under the non-TEX-remap interpretation of those attribute bits,
 805         * excepting various implementation-defined aspects of shareability.
 806         */
 807        cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) |
 808                                ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) |
 809                                ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) |
 810                                ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 |
 811                                ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7);
 812        cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) |
 813                                ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA);
 814
 815        /* Looking good; allocate a pgd */
 816        data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data);
 817        if (!data->pgd)
 818                goto out_free_data;
 819
 820        /* Ensure the empty pgd is visible before any actual TTBR write */
 821        wmb();
 822
 823        /* TTBR */
 824        cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S |
 825                                (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
 826                                 ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
 827                                 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
 828                                (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
 829                                 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
 830        return &data->iop;
 831
 832out_free_data:
 833        kmem_cache_destroy(data->l2_tables);
 834        kfree(data);
 835        return NULL;
 836}
 837
 838struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
 839        .alloc  = arm_v7s_alloc_pgtable,
 840        .free   = arm_v7s_free_pgtable,
 841};
 842
 843#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
 844
 845static struct io_pgtable_cfg *cfg_cookie __initdata;
 846
 847static void __init dummy_tlb_flush_all(void *cookie)
 848{
 849        WARN_ON(cookie != cfg_cookie);
 850}
 851
 852static void __init dummy_tlb_flush(unsigned long iova, size_t size,
 853                                   size_t granule, void *cookie)
 854{
 855        WARN_ON(cookie != cfg_cookie);
 856        WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 857}
 858
 859static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
 860                                      unsigned long iova, size_t granule,
 861                                      void *cookie)
 862{
 863        dummy_tlb_flush(iova, granule, granule, cookie);
 864}
 865
 866static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
 867        .tlb_flush_all  = dummy_tlb_flush_all,
 868        .tlb_flush_walk = dummy_tlb_flush,
 869        .tlb_add_page   = dummy_tlb_add_page,
 870};
 871
 872#define __FAIL(ops)     ({                              \
 873                WARN(1, "selftest: test failed\n");     \
 874                selftest_running = false;               \
 875                -EFAULT;                                \
 876})
 877
 878static int __init arm_v7s_do_selftests(void)
 879{
 880        struct io_pgtable_ops *ops;
 881        struct io_pgtable_cfg cfg = {
 882                .tlb = &dummy_tlb_ops,
 883                .oas = 32,
 884                .ias = 32,
 885                .coherent_walk = true,
 886                .quirks = IO_PGTABLE_QUIRK_ARM_NS,
 887                .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
 888        };
 889        unsigned int iova, size, iova_start;
 890        unsigned int i, loopnr = 0;
 891
 892        selftest_running = true;
 893
 894        cfg_cookie = &cfg;
 895
 896        ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg);
 897        if (!ops) {
 898                pr_err("selftest: failed to allocate io pgtable ops\n");
 899                return -EINVAL;
 900        }
 901
 902        /*
 903         * Initial sanity checks.
 904         * Empty page tables shouldn't provide any translations.
 905         */
 906        if (ops->iova_to_phys(ops, 42))
 907                return __FAIL(ops);
 908
 909        if (ops->iova_to_phys(ops, SZ_1G + 42))
 910                return __FAIL(ops);
 911
 912        if (ops->iova_to_phys(ops, SZ_2G + 42))
 913                return __FAIL(ops);
 914
 915        /*
 916         * Distinct mappings of different granule sizes.
 917         */
 918        iova = 0;
 919        for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
 920                size = 1UL << i;
 921                if (ops->map(ops, iova, iova, size, IOMMU_READ |
 922                                                    IOMMU_WRITE |
 923                                                    IOMMU_NOEXEC |
 924                                                    IOMMU_CACHE, GFP_KERNEL))
 925                        return __FAIL(ops);
 926
 927                /* Overlapping mappings */
 928                if (!ops->map(ops, iova, iova + size, size,
 929                              IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
 930                        return __FAIL(ops);
 931
 932                if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
 933                        return __FAIL(ops);
 934
 935                iova += SZ_16M;
 936                loopnr++;
 937        }
 938
 939        /* Partial unmap */
 940        i = 1;
 941        size = 1UL << __ffs(cfg.pgsize_bitmap);
 942        while (i < loopnr) {
 943                iova_start = i * SZ_16M;
 944                if (ops->unmap(ops, iova_start + size, size, NULL) != size)
 945                        return __FAIL(ops);
 946
 947                /* Remap of partial unmap */
 948                if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL))
 949                        return __FAIL(ops);
 950
 951                if (ops->iova_to_phys(ops, iova_start + size + 42)
 952                    != (size + 42))
 953                        return __FAIL(ops);
 954                i++;
 955        }
 956
 957        /* Full unmap */
 958        iova = 0;
 959        for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
 960                size = 1UL << i;
 961
 962                if (ops->unmap(ops, iova, size, NULL) != size)
 963                        return __FAIL(ops);
 964
 965                if (ops->iova_to_phys(ops, iova + 42))
 966                        return __FAIL(ops);
 967
 968                /* Remap full block */
 969                if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
 970                        return __FAIL(ops);
 971
 972                if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
 973                        return __FAIL(ops);
 974
 975                iova += SZ_16M;
 976        }
 977
 978        free_io_pgtable_ops(ops);
 979
 980        selftest_running = false;
 981
 982        pr_info("self test ok\n");
 983        return 0;
 984}
 985subsys_initcall(arm_v7s_do_selftests);
 986#endif
 987