linux/drivers/iommu/arm-smmu.c
<<
>>
Prefs
   1/*
   2 * IOMMU API for ARM architected SMMU implementations.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 * GNU General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  16 *
  17 * Copyright (C) 2013 ARM Limited
  18 *
  19 * Author: Will Deacon <will.deacon@arm.com>
  20 *
  21 * This driver currently supports:
  22 *      - SMMUv1 and v2 implementations
  23 *      - Stream-matching and stream-indexing
  24 *      - v7/v8 long-descriptor format
  25 *      - Non-secure access to the SMMU
  26 *      - Context fault reporting
  27 *      - Extended Stream ID (16 bit)
  28 */
  29
  30#define pr_fmt(fmt) "arm-smmu: " fmt
  31
  32#include <linux/acpi.h>
  33#include <linux/acpi_iort.h>
  34#include <linux/atomic.h>
  35#include <linux/delay.h>
  36#include <linux/dma-iommu.h>
  37#include <linux/dma-mapping.h>
  38#include <linux/err.h>
  39#include <linux/interrupt.h>
  40#include <linux/io.h>
  41#include <linux/io-64-nonatomic-hi-lo.h>
  42#include <linux/iommu.h>
  43#include <linux/iopoll.h>
  44#include <linux/module.h>
  45#include <linux/of.h>
  46#include <linux/of_address.h>
  47#include <linux/of_device.h>
  48#include <linux/of_iommu.h>
  49#include <linux/pci.h>
  50#include <linux/platform_device.h>
  51#include <linux/slab.h>
  52#include <linux/spinlock.h>
  53
  54#include <linux/amba/bus.h>
  55
  56#include "io-pgtable.h"
  57#include "arm-smmu-regs.h"
  58
  59#define ARM_MMU500_ACTLR_CPRE           (1 << 1)
  60
  61#define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
  62#define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
  63#define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
  64
  65#define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
  66#define TLB_SPIN_COUNT                  10
  67
  68/* Maximum number of context banks per SMMU */
  69#define ARM_SMMU_MAX_CBS                128
  70
  71/* SMMU global address space */
  72#define ARM_SMMU_GR0(smmu)              ((smmu)->base)
  73#define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
  74
  75/*
  76 * SMMU global address space with conditional offset to access secure
  77 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
  78 * nsGFSYNR0: 0x450)
  79 */
  80#define ARM_SMMU_GR0_NS(smmu)                                           \
  81        ((smmu)->base +                                                 \
  82                ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
  83                        ? 0x400 : 0))
  84
  85/*
  86 * Some 64-bit registers only make sense to write atomically, but in such
  87 * cases all the data relevant to AArch32 formats lies within the lower word,
  88 * therefore this actually makes more sense than it might first appear.
  89 */
  90#ifdef CONFIG_64BIT
  91#define smmu_write_atomic_lq            writeq_relaxed
  92#else
  93#define smmu_write_atomic_lq            writel_relaxed
  94#endif
  95
  96/* Translation context bank */
  97#define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
  98
  99#define MSI_IOVA_BASE                   0x8000000
 100#define MSI_IOVA_LENGTH                 0x100000
 101
 102static int force_stage;
 103module_param(force_stage, int, S_IRUGO);
 104MODULE_PARM_DESC(force_stage,
 105        "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
 106static bool disable_bypass;
 107module_param(disable_bypass, bool, S_IRUGO);
 108MODULE_PARM_DESC(disable_bypass,
 109        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 110
 111enum arm_smmu_arch_version {
 112        ARM_SMMU_V1,
 113        ARM_SMMU_V1_64K,
 114        ARM_SMMU_V2,
 115};
 116
 117enum arm_smmu_implementation {
 118        GENERIC_SMMU,
 119        ARM_MMU500,
 120        CAVIUM_SMMUV2,
 121};
 122
 123struct arm_smmu_s2cr {
 124        struct iommu_group              *group;
 125        int                             count;
 126        enum arm_smmu_s2cr_type         type;
 127        enum arm_smmu_s2cr_privcfg      privcfg;
 128        u8                              cbndx;
 129};
 130
 131#define s2cr_init_val (struct arm_smmu_s2cr){                           \
 132        .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
 133}
 134
 135struct arm_smmu_smr {
 136        u16                             mask;
 137        u16                             id;
 138        bool                            valid;
 139};
 140
 141struct arm_smmu_cb {
 142        u64                             ttbr[2];
 143        u32                             tcr[2];
 144        u32                             mair[2];
 145        struct arm_smmu_cfg             *cfg;
 146};
 147
 148struct arm_smmu_master_cfg {
 149        struct arm_smmu_device          *smmu;
 150        s16                             smendx[];
 151};
 152#define INVALID_SMENDX                  -1
 153#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
 154#define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
 155#define fwspec_smendx(fw, i) \
 156        (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
 157#define for_each_cfg_sme(fw, i, idx) \
 158        for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
 159
 160struct arm_smmu_device {
 161        struct device                   *dev;
 162
 163        void __iomem                    *base;
 164        void __iomem                    *cb_base;
 165        unsigned long                   pgshift;
 166
 167#define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
 168#define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
 169#define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
 170#define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
 171#define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
 172#define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
 173#define ARM_SMMU_FEAT_VMID16            (1 << 6)
 174#define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
 175#define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
 176#define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
 177#define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
 178#define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
 179#define ARM_SMMU_FEAT_EXIDS             (1 << 12)
 180        u32                             features;
 181
 182#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
 183        u32                             options;
 184        enum arm_smmu_arch_version      version;
 185        enum arm_smmu_implementation    model;
 186
 187        u32                             num_context_banks;
 188        u32                             num_s2_context_banks;
 189        DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
 190        struct arm_smmu_cb              *cbs;
 191        atomic_t                        irptndx;
 192
 193        u32                             num_mapping_groups;
 194        u16                             streamid_mask;
 195        u16                             smr_mask_mask;
 196        struct arm_smmu_smr             *smrs;
 197        struct arm_smmu_s2cr            *s2crs;
 198        struct mutex                    stream_map_mutex;
 199
 200        unsigned long                   va_size;
 201        unsigned long                   ipa_size;
 202        unsigned long                   pa_size;
 203        unsigned long                   pgsize_bitmap;
 204
 205        u32                             num_global_irqs;
 206        u32                             num_context_irqs;
 207        unsigned int                    *irqs;
 208
 209        u32                             cavium_id_base; /* Specific to Cavium */
 210
 211        spinlock_t                      global_sync_lock;
 212
 213        /* IOMMU core code handle */
 214        struct iommu_device             iommu;
 215};
 216
 217enum arm_smmu_context_fmt {
 218        ARM_SMMU_CTX_FMT_NONE,
 219        ARM_SMMU_CTX_FMT_AARCH64,
 220        ARM_SMMU_CTX_FMT_AARCH32_L,
 221        ARM_SMMU_CTX_FMT_AARCH32_S,
 222};
 223
 224struct arm_smmu_cfg {
 225        u8                              cbndx;
 226        u8                              irptndx;
 227        union {
 228                u16                     asid;
 229                u16                     vmid;
 230        };
 231        u32                             cbar;
 232        enum arm_smmu_context_fmt       fmt;
 233};
 234#define INVALID_IRPTNDX                 0xff
 235
 236enum arm_smmu_domain_stage {
 237        ARM_SMMU_DOMAIN_S1 = 0,
 238        ARM_SMMU_DOMAIN_S2,
 239        ARM_SMMU_DOMAIN_NESTED,
 240        ARM_SMMU_DOMAIN_BYPASS,
 241};
 242
 243struct arm_smmu_domain {
 244        struct arm_smmu_device          *smmu;
 245        struct io_pgtable_ops           *pgtbl_ops;
 246        const struct iommu_gather_ops   *tlb_ops;
 247        struct arm_smmu_cfg             cfg;
 248        enum arm_smmu_domain_stage      stage;
 249        struct mutex                    init_mutex; /* Protects smmu pointer */
 250        spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
 251        struct iommu_domain             domain;
 252};
 253
 254struct arm_smmu_option_prop {
 255        u32 opt;
 256        const char *prop;
 257};
 258
 259static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
 260
 261static bool using_legacy_binding, using_generic_binding;
 262
 263static struct arm_smmu_option_prop arm_smmu_options[] = {
 264        { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
 265        { 0, NULL},
 266};
 267
 268static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 269{
 270        return container_of(dom, struct arm_smmu_domain, domain);
 271}
 272
 273static void parse_driver_options(struct arm_smmu_device *smmu)
 274{
 275        int i = 0;
 276
 277        do {
 278                if (of_property_read_bool(smmu->dev->of_node,
 279                                                arm_smmu_options[i].prop)) {
 280                        smmu->options |= arm_smmu_options[i].opt;
 281                        dev_notice(smmu->dev, "option %s\n",
 282                                arm_smmu_options[i].prop);
 283                }
 284        } while (arm_smmu_options[++i].opt);
 285}
 286
 287static struct device_node *dev_get_dev_node(struct device *dev)
 288{
 289        if (dev_is_pci(dev)) {
 290                struct pci_bus *bus = to_pci_dev(dev)->bus;
 291
 292                while (!pci_is_root_bus(bus))
 293                        bus = bus->parent;
 294                return of_node_get(bus->bridge->parent->of_node);
 295        }
 296
 297        return of_node_get(dev->of_node);
 298}
 299
 300static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
 301{
 302        *((__be32 *)data) = cpu_to_be32(alias);
 303        return 0; /* Continue walking */
 304}
 305
 306static int __find_legacy_master_phandle(struct device *dev, void *data)
 307{
 308        struct of_phandle_iterator *it = *(void **)data;
 309        struct device_node *np = it->node;
 310        int err;
 311
 312        of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
 313                            "#stream-id-cells", 0)
 314                if (it->node == np) {
 315                        *(void **)data = dev;
 316                        return 1;
 317                }
 318        it->node = np;
 319        return err == -ENOENT ? 0 : err;
 320}
 321
 322static struct platform_driver arm_smmu_driver;
 323static struct iommu_ops arm_smmu_ops;
 324
 325static int arm_smmu_register_legacy_master(struct device *dev,
 326                                           struct arm_smmu_device **smmu)
 327{
 328        struct device *smmu_dev;
 329        struct device_node *np;
 330        struct of_phandle_iterator it;
 331        void *data = &it;
 332        u32 *sids;
 333        __be32 pci_sid;
 334        int err;
 335
 336        np = dev_get_dev_node(dev);
 337        if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
 338                of_node_put(np);
 339                return -ENODEV;
 340        }
 341
 342        it.node = np;
 343        err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
 344                                     __find_legacy_master_phandle);
 345        smmu_dev = data;
 346        of_node_put(np);
 347        if (err == 0)
 348                return -ENODEV;
 349        if (err < 0)
 350                return err;
 351
 352        if (dev_is_pci(dev)) {
 353                /* "mmu-masters" assumes Stream ID == Requester ID */
 354                pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
 355                                       &pci_sid);
 356                it.cur = &pci_sid;
 357                it.cur_count = 1;
 358        }
 359
 360        err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
 361                                &arm_smmu_ops);
 362        if (err)
 363                return err;
 364
 365        sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
 366        if (!sids)
 367                return -ENOMEM;
 368
 369        *smmu = dev_get_drvdata(smmu_dev);
 370        of_phandle_iterator_args(&it, sids, it.cur_count);
 371        err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
 372        kfree(sids);
 373        return err;
 374}
 375
 376static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
 377{
 378        int idx;
 379
 380        do {
 381                idx = find_next_zero_bit(map, end, start);
 382                if (idx == end)
 383                        return -ENOSPC;
 384        } while (test_and_set_bit(idx, map));
 385
 386        return idx;
 387}
 388
 389static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 390{
 391        clear_bit(idx, map);
 392}
 393
 394/* Wait for any pending TLB invalidations to complete */
 395static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
 396                                void __iomem *sync, void __iomem *status)
 397{
 398        unsigned int spin_cnt, delay;
 399
 400        writel_relaxed(0, sync);
 401        for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
 402                for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
 403                        if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
 404                                return;
 405                        cpu_relax();
 406                }
 407                udelay(delay);
 408        }
 409        dev_err_ratelimited(smmu->dev,
 410                            "TLB sync timed out -- SMMU may be deadlocked\n");
 411}
 412
 413static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
 414{
 415        void __iomem *base = ARM_SMMU_GR0(smmu);
 416        unsigned long flags;
 417
 418        spin_lock_irqsave(&smmu->global_sync_lock, flags);
 419        __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
 420                            base + ARM_SMMU_GR0_sTLBGSTATUS);
 421        spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
 422}
 423
 424static void arm_smmu_tlb_sync_context(void *cookie)
 425{
 426        struct arm_smmu_domain *smmu_domain = cookie;
 427        struct arm_smmu_device *smmu = smmu_domain->smmu;
 428        void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
 429        unsigned long flags;
 430
 431        spin_lock_irqsave(&smmu_domain->cb_lock, flags);
 432        __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
 433                            base + ARM_SMMU_CB_TLBSTATUS);
 434        spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 435}
 436
 437static void arm_smmu_tlb_sync_vmid(void *cookie)
 438{
 439        struct arm_smmu_domain *smmu_domain = cookie;
 440
 441        arm_smmu_tlb_sync_global(smmu_domain->smmu);
 442}
 443
 444static void arm_smmu_tlb_inv_context_s1(void *cookie)
 445{
 446        struct arm_smmu_domain *smmu_domain = cookie;
 447        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 448        void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 449
 450        writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
 451        arm_smmu_tlb_sync_context(cookie);
 452}
 453
 454static void arm_smmu_tlb_inv_context_s2(void *cookie)
 455{
 456        struct arm_smmu_domain *smmu_domain = cookie;
 457        struct arm_smmu_device *smmu = smmu_domain->smmu;
 458        void __iomem *base = ARM_SMMU_GR0(smmu);
 459
 460        writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
 461        arm_smmu_tlb_sync_global(smmu);
 462}
 463
 464static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 465                                          size_t granule, bool leaf, void *cookie)
 466{
 467        struct arm_smmu_domain *smmu_domain = cookie;
 468        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 469        bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 470        void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 471
 472        if (stage1) {
 473                reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 474
 475                if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
 476                        iova &= ~12UL;
 477                        iova |= cfg->asid;
 478                        do {
 479                                writel_relaxed(iova, reg);
 480                                iova += granule;
 481                        } while (size -= granule);
 482                } else {
 483                        iova >>= 12;
 484                        iova |= (u64)cfg->asid << 48;
 485                        do {
 486                                writeq_relaxed(iova, reg);
 487                                iova += granule >> 12;
 488                        } while (size -= granule);
 489                }
 490        } else {
 491                reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 492                              ARM_SMMU_CB_S2_TLBIIPAS2;
 493                iova >>= 12;
 494                do {
 495                        smmu_write_atomic_lq(iova, reg);
 496                        iova += granule >> 12;
 497                } while (size -= granule);
 498        }
 499}
 500
 501/*
 502 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
 503 * almost negligible, but the benefit of getting the first one in as far ahead
 504 * of the sync as possible is significant, hence we don't just make this a
 505 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
 506 */
 507static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
 508                                         size_t granule, bool leaf, void *cookie)
 509{
 510        struct arm_smmu_domain *smmu_domain = cookie;
 511        void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
 512
 513        writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
 514}
 515
 516static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
 517        .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
 518        .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
 519        .tlb_sync       = arm_smmu_tlb_sync_context,
 520};
 521
 522static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
 523        .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
 524        .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
 525        .tlb_sync       = arm_smmu_tlb_sync_context,
 526};
 527
 528static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
 529        .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
 530        .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
 531        .tlb_sync       = arm_smmu_tlb_sync_vmid,
 532};
 533
 534static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 535{
 536        u32 fsr, fsynr;
 537        unsigned long iova;
 538        struct iommu_domain *domain = dev;
 539        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 540        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 541        struct arm_smmu_device *smmu = smmu_domain->smmu;
 542        void __iomem *cb_base;
 543
 544        cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 545        fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
 546
 547        if (!(fsr & FSR_FAULT))
 548                return IRQ_NONE;
 549
 550        fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
 551        iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
 552
 553        dev_err_ratelimited(smmu->dev,
 554        "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
 555                            fsr, iova, fsynr, cfg->cbndx);
 556
 557        writel(fsr, cb_base + ARM_SMMU_CB_FSR);
 558        return IRQ_HANDLED;
 559}
 560
 561static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
 562{
 563        u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
 564        struct arm_smmu_device *smmu = dev;
 565        void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
 566
 567        gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
 568        gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
 569        gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
 570        gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
 571
 572        if (!gfsr)
 573                return IRQ_NONE;
 574
 575        dev_err_ratelimited(smmu->dev,
 576                "Unexpected global fault, this could be serious\n");
 577        dev_err_ratelimited(smmu->dev,
 578                "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
 579                gfsr, gfsynr0, gfsynr1, gfsynr2);
 580
 581        writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
 582        return IRQ_HANDLED;
 583}
 584
 585static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 586                                       struct io_pgtable_cfg *pgtbl_cfg)
 587{
 588        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 589        struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
 590        bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 591
 592        cb->cfg = cfg;
 593
 594        /* TTBCR */
 595        if (stage1) {
 596                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
 597                        cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
 598                } else {
 599                        cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
 600                        cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
 601                        cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
 602                        if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
 603                                cb->tcr[1] |= TTBCR2_AS;
 604                }
 605        } else {
 606                cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
 607        }
 608
 609        /* TTBRs */
 610        if (stage1) {
 611                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
 612                        cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
 613                        cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
 614                } else {
 615                        cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
 616                        cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
 617                        cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
 618                        cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
 619                }
 620        } else {
 621                cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
 622        }
 623
 624        /* MAIRs (stage-1 only) */
 625        if (stage1) {
 626                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
 627                        cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
 628                        cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
 629                } else {
 630                        cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
 631                        cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
 632                }
 633        }
 634}
 635
 636static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
 637{
 638        u32 reg;
 639        bool stage1;
 640        struct arm_smmu_cb *cb = &smmu->cbs[idx];
 641        struct arm_smmu_cfg *cfg = cb->cfg;
 642        void __iomem *cb_base, *gr1_base;
 643
 644        cb_base = ARM_SMMU_CB(smmu, idx);
 645
 646        /* Unassigned context banks only need disabling */
 647        if (!cfg) {
 648                writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 649                return;
 650        }
 651
 652        gr1_base = ARM_SMMU_GR1(smmu);
 653        stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 654
 655        /* CBA2R */
 656        if (smmu->version > ARM_SMMU_V1) {
 657                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
 658                        reg = CBA2R_RW64_64BIT;
 659                else
 660                        reg = CBA2R_RW64_32BIT;
 661                /* 16-bit VMIDs live in CBA2R */
 662                if (smmu->features & ARM_SMMU_FEAT_VMID16)
 663                        reg |= cfg->vmid << CBA2R_VMID_SHIFT;
 664
 665                writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
 666        }
 667
 668        /* CBAR */
 669        reg = cfg->cbar;
 670        if (smmu->version < ARM_SMMU_V2)
 671                reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 672
 673        /*
 674         * Use the weakest shareability/memory types, so they are
 675         * overridden by the ttbcr/pte.
 676         */
 677        if (stage1) {
 678                reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
 679                        (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
 680        } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
 681                /* 8-bit VMIDs live in CBAR */
 682                reg |= cfg->vmid << CBAR_VMID_SHIFT;
 683        }
 684        writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
 685
 686        /*
 687         * TTBCR
 688         * We must write this before the TTBRs, since it determines the
 689         * access behaviour of some fields (in particular, ASID[15:8]).
 690         */
 691        if (stage1 && smmu->version > ARM_SMMU_V1)
 692                writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
 693        writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
 694
 695        /* TTBRs */
 696        if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
 697                writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
 698                writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
 699                writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
 700        } else {
 701                writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
 702                if (stage1)
 703                        writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
 704        }
 705
 706        /* MAIRs (stage-1 only) */
 707        if (stage1) {
 708                writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
 709                writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
 710        }
 711
 712        /* SCTLR */
 713        reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
 714        if (stage1)
 715                reg |= SCTLR_S1_ASIDPNE;
 716        if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 717                reg |= SCTLR_E;
 718
 719        writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
 720}
 721
 722static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 723                                        struct arm_smmu_device *smmu)
 724{
 725        int irq, start, ret = 0;
 726        unsigned long ias, oas;
 727        struct io_pgtable_ops *pgtbl_ops;
 728        struct io_pgtable_cfg pgtbl_cfg;
 729        enum io_pgtable_fmt fmt;
 730        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 731        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 732
 733        mutex_lock(&smmu_domain->init_mutex);
 734        if (smmu_domain->smmu)
 735                goto out_unlock;
 736
 737        if (domain->type == IOMMU_DOMAIN_IDENTITY) {
 738                smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
 739                smmu_domain->smmu = smmu;
 740                goto out_unlock;
 741        }
 742
 743        /*
 744         * Mapping the requested stage onto what we support is surprisingly
 745         * complicated, mainly because the spec allows S1+S2 SMMUs without
 746         * support for nested translation. That means we end up with the
 747         * following table:
 748         *
 749         * Requested        Supported        Actual
 750         *     S1               N              S1
 751         *     S1             S1+S2            S1
 752         *     S1               S2             S2
 753         *     S1               S1             S1
 754         *     N                N              N
 755         *     N              S1+S2            S2
 756         *     N                S2             S2
 757         *     N                S1             S1
 758         *
 759         * Note that you can't actually request stage-2 mappings.
 760         */
 761        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
 762                smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
 763        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
 764                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 765
 766        /*
 767         * Choosing a suitable context format is even more fiddly. Until we
 768         * grow some way for the caller to express a preference, and/or move
 769         * the decision into the io-pgtable code where it arguably belongs,
 770         * just aim for the closest thing to the rest of the system, and hope
 771         * that the hardware isn't esoteric enough that we can't assume AArch64
 772         * support to be a superset of AArch32 support...
 773         */
 774        if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
 775                cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
 776        if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
 777            !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
 778            (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
 779            (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
 780                cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
 781        if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
 782            (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
 783                               ARM_SMMU_FEAT_FMT_AARCH64_16K |
 784                               ARM_SMMU_FEAT_FMT_AARCH64_4K)))
 785                cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
 786
 787        if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
 788                ret = -EINVAL;
 789                goto out_unlock;
 790        }
 791
 792        switch (smmu_domain->stage) {
 793        case ARM_SMMU_DOMAIN_S1:
 794                cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
 795                start = smmu->num_s2_context_banks;
 796                ias = smmu->va_size;
 797                oas = smmu->ipa_size;
 798                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 799                        fmt = ARM_64_LPAE_S1;
 800                } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
 801                        fmt = ARM_32_LPAE_S1;
 802                        ias = min(ias, 32UL);
 803                        oas = min(oas, 40UL);
 804                } else {
 805                        fmt = ARM_V7S;
 806                        ias = min(ias, 32UL);
 807                        oas = min(oas, 32UL);
 808                }
 809                smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
 810                break;
 811        case ARM_SMMU_DOMAIN_NESTED:
 812                /*
 813                 * We will likely want to change this if/when KVM gets
 814                 * involved.
 815                 */
 816        case ARM_SMMU_DOMAIN_S2:
 817                cfg->cbar = CBAR_TYPE_S2_TRANS;
 818                start = 0;
 819                ias = smmu->ipa_size;
 820                oas = smmu->pa_size;
 821                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 822                        fmt = ARM_64_LPAE_S2;
 823                } else {
 824                        fmt = ARM_32_LPAE_S2;
 825                        ias = min(ias, 40UL);
 826                        oas = min(oas, 40UL);
 827                }
 828                if (smmu->version == ARM_SMMU_V2)
 829                        smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
 830                else
 831                        smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
 832                break;
 833        default:
 834                ret = -EINVAL;
 835                goto out_unlock;
 836        }
 837        ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
 838                                      smmu->num_context_banks);
 839        if (ret < 0)
 840                goto out_unlock;
 841
 842        cfg->cbndx = ret;
 843        if (smmu->version < ARM_SMMU_V2) {
 844                cfg->irptndx = atomic_inc_return(&smmu->irptndx);
 845                cfg->irptndx %= smmu->num_context_irqs;
 846        } else {
 847                cfg->irptndx = cfg->cbndx;
 848        }
 849
 850        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
 851                cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
 852        else
 853                cfg->asid = cfg->cbndx + smmu->cavium_id_base;
 854
 855        pgtbl_cfg = (struct io_pgtable_cfg) {
 856                .pgsize_bitmap  = smmu->pgsize_bitmap,
 857                .ias            = ias,
 858                .oas            = oas,
 859                .tlb            = smmu_domain->tlb_ops,
 860                .iommu_dev      = smmu->dev,
 861        };
 862
 863        if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
 864                pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 865
 866        smmu_domain->smmu = smmu;
 867        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 868        if (!pgtbl_ops) {
 869                ret = -ENOMEM;
 870                goto out_clear_smmu;
 871        }
 872
 873        /* Update the domain's page sizes to reflect the page table format */
 874        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 875        domain->geometry.aperture_end = (1UL << ias) - 1;
 876        domain->geometry.force_aperture = true;
 877
 878        /* Initialise the context bank with our page table cfg */
 879        arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
 880        arm_smmu_write_context_bank(smmu, cfg->cbndx);
 881
 882        /*
 883         * Request context fault interrupt. Do this last to avoid the
 884         * handler seeing a half-initialised domain state.
 885         */
 886        irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
 887        ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
 888                               IRQF_SHARED, "arm-smmu-context-fault", domain);
 889        if (ret < 0) {
 890                dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
 891                        cfg->irptndx, irq);
 892                cfg->irptndx = INVALID_IRPTNDX;
 893        }
 894
 895        mutex_unlock(&smmu_domain->init_mutex);
 896
 897        /* Publish page table ops for map/unmap */
 898        smmu_domain->pgtbl_ops = pgtbl_ops;
 899        return 0;
 900
 901out_clear_smmu:
 902        smmu_domain->smmu = NULL;
 903out_unlock:
 904        mutex_unlock(&smmu_domain->init_mutex);
 905        return ret;
 906}
 907
 908static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 909{
 910        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 911        struct arm_smmu_device *smmu = smmu_domain->smmu;
 912        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 913        int irq;
 914
 915        if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
 916                return;
 917
 918        /*
 919         * Disable the context bank and free the page tables before freeing
 920         * it.
 921         */
 922        smmu->cbs[cfg->cbndx].cfg = NULL;
 923        arm_smmu_write_context_bank(smmu, cfg->cbndx);
 924
 925        if (cfg->irptndx != INVALID_IRPTNDX) {
 926                irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
 927                devm_free_irq(smmu->dev, irq, domain);
 928        }
 929
 930        free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 931        __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
 932}
 933
 934static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 935{
 936        struct arm_smmu_domain *smmu_domain;
 937
 938        if (type != IOMMU_DOMAIN_UNMANAGED &&
 939            type != IOMMU_DOMAIN_DMA &&
 940            type != IOMMU_DOMAIN_IDENTITY)
 941                return NULL;
 942        /*
 943         * Allocate the domain and initialise some of its data structures.
 944         * We can't really do anything meaningful until we've added a
 945         * master.
 946         */
 947        smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
 948        if (!smmu_domain)
 949                return NULL;
 950
 951        if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
 952            iommu_get_dma_cookie(&smmu_domain->domain))) {
 953                kfree(smmu_domain);
 954                return NULL;
 955        }
 956
 957        mutex_init(&smmu_domain->init_mutex);
 958        spin_lock_init(&smmu_domain->cb_lock);
 959
 960        return &smmu_domain->domain;
 961}
 962
 963static void arm_smmu_domain_free(struct iommu_domain *domain)
 964{
 965        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 966
 967        /*
 968         * Free the domain resources. We assume that all devices have
 969         * already been detached.
 970         */
 971        iommu_put_dma_cookie(domain);
 972        arm_smmu_destroy_domain_context(domain);
 973        kfree(smmu_domain);
 974}
 975
 976static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
 977{
 978        struct arm_smmu_smr *smr = smmu->smrs + idx;
 979        u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
 980
 981        if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
 982                reg |= SMR_VALID;
 983        writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
 984}
 985
 986static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
 987{
 988        struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
 989        u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
 990                  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
 991                  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
 992
 993        if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
 994            smmu->smrs[idx].valid)
 995                reg |= S2CR_EXIDVALID;
 996        writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
 997}
 998
 999static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1000{
1001        arm_smmu_write_s2cr(smmu, idx);
1002        if (smmu->smrs)
1003                arm_smmu_write_smr(smmu, idx);
1004}
1005
1006/*
1007 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1008 * should be called after sCR0 is written.
1009 */
1010static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1011{
1012        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1013        u32 smr;
1014
1015        if (!smmu->smrs)
1016                return;
1017
1018        /*
1019         * SMR.ID bits may not be preserved if the corresponding MASK
1020         * bits are set, so check each one separately. We can reject
1021         * masters later if they try to claim IDs outside these masks.
1022         */
1023        smr = smmu->streamid_mask << SMR_ID_SHIFT;
1024        writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1025        smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1026        smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1027
1028        smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1029        writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1030        smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1031        smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1032}
1033
1034static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1035{
1036        struct arm_smmu_smr *smrs = smmu->smrs;
1037        int i, free_idx = -ENOSPC;
1038
1039        /* Stream indexing is blissfully easy */
1040        if (!smrs)
1041                return id;
1042
1043        /* Validating SMRs is... less so */
1044        for (i = 0; i < smmu->num_mapping_groups; ++i) {
1045                if (!smrs[i].valid) {
1046                        /*
1047                         * Note the first free entry we come across, which
1048                         * we'll claim in the end if nothing else matches.
1049                         */
1050                        if (free_idx < 0)
1051                                free_idx = i;
1052                        continue;
1053                }
1054                /*
1055                 * If the new entry is _entirely_ matched by an existing entry,
1056                 * then reuse that, with the guarantee that there also cannot
1057                 * be any subsequent conflicting entries. In normal use we'd
1058                 * expect simply identical entries for this case, but there's
1059                 * no harm in accommodating the generalisation.
1060                 */
1061                if ((mask & smrs[i].mask) == mask &&
1062                    !((id ^ smrs[i].id) & ~smrs[i].mask))
1063                        return i;
1064                /*
1065                 * If the new entry has any other overlap with an existing one,
1066                 * though, then there always exists at least one stream ID
1067                 * which would cause a conflict, and we can't allow that risk.
1068                 */
1069                if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1070                        return -EINVAL;
1071        }
1072
1073        return free_idx;
1074}
1075
1076static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1077{
1078        if (--smmu->s2crs[idx].count)
1079                return false;
1080
1081        smmu->s2crs[idx] = s2cr_init_val;
1082        if (smmu->smrs)
1083                smmu->smrs[idx].valid = false;
1084
1085        return true;
1086}
1087
1088static int arm_smmu_master_alloc_smes(struct device *dev)
1089{
1090        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1091        struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1092        struct arm_smmu_device *smmu = cfg->smmu;
1093        struct arm_smmu_smr *smrs = smmu->smrs;
1094        struct iommu_group *group;
1095        int i, idx, ret;
1096
1097        mutex_lock(&smmu->stream_map_mutex);
1098        /* Figure out a viable stream map entry allocation */
1099        for_each_cfg_sme(fwspec, i, idx) {
1100                u16 sid = fwspec->ids[i];
1101                u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1102
1103                if (idx != INVALID_SMENDX) {
1104                        ret = -EEXIST;
1105                        goto out_err;
1106                }
1107
1108                ret = arm_smmu_find_sme(smmu, sid, mask);
1109                if (ret < 0)
1110                        goto out_err;
1111
1112                idx = ret;
1113                if (smrs && smmu->s2crs[idx].count == 0) {
1114                        smrs[idx].id = sid;
1115                        smrs[idx].mask = mask;
1116                        smrs[idx].valid = true;
1117                }
1118                smmu->s2crs[idx].count++;
1119                cfg->smendx[i] = (s16)idx;
1120        }
1121
1122        group = iommu_group_get_for_dev(dev);
1123        if (!group)
1124                group = ERR_PTR(-ENOMEM);
1125        if (IS_ERR(group)) {
1126                ret = PTR_ERR(group);
1127                goto out_err;
1128        }
1129        iommu_group_put(group);
1130
1131        /* It worked! Now, poke the actual hardware */
1132        for_each_cfg_sme(fwspec, i, idx) {
1133                arm_smmu_write_sme(smmu, idx);
1134                smmu->s2crs[idx].group = group;
1135        }
1136
1137        mutex_unlock(&smmu->stream_map_mutex);
1138        return 0;
1139
1140out_err:
1141        while (i--) {
1142                arm_smmu_free_sme(smmu, cfg->smendx[i]);
1143                cfg->smendx[i] = INVALID_SMENDX;
1144        }
1145        mutex_unlock(&smmu->stream_map_mutex);
1146        return ret;
1147}
1148
1149static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1150{
1151        struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1152        struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1153        int i, idx;
1154
1155        mutex_lock(&smmu->stream_map_mutex);
1156        for_each_cfg_sme(fwspec, i, idx) {
1157                if (arm_smmu_free_sme(smmu, idx))
1158                        arm_smmu_write_sme(smmu, idx);
1159                cfg->smendx[i] = INVALID_SMENDX;
1160        }
1161        mutex_unlock(&smmu->stream_map_mutex);
1162}
1163
1164static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1165                                      struct iommu_fwspec *fwspec)
1166{
1167        struct arm_smmu_device *smmu = smmu_domain->smmu;
1168        struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1169        u8 cbndx = smmu_domain->cfg.cbndx;
1170        enum arm_smmu_s2cr_type type;
1171        int i, idx;
1172
1173        if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1174                type = S2CR_TYPE_BYPASS;
1175        else
1176                type = S2CR_TYPE_TRANS;
1177
1178        for_each_cfg_sme(fwspec, i, idx) {
1179                if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1180                        continue;
1181
1182                s2cr[idx].type = type;
1183                s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1184                s2cr[idx].cbndx = cbndx;
1185                arm_smmu_write_s2cr(smmu, idx);
1186        }
1187        return 0;
1188}
1189
1190static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1191{
1192        int ret;
1193        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1194        struct arm_smmu_device *smmu;
1195        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1196
1197        if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1198                dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1199                return -ENXIO;
1200        }
1201
1202        /*
1203         * FIXME: The arch/arm DMA API code tries to attach devices to its own
1204         * domains between of_xlate() and add_device() - we have no way to cope
1205         * with that, so until ARM gets converted to rely on groups and default
1206         * domains, just say no (but more politely than by dereferencing NULL).
1207         * This should be at least a WARN_ON once that's sorted.
1208         */
1209        if (!fwspec->iommu_priv)
1210                return -ENODEV;
1211
1212        smmu = fwspec_smmu(fwspec);
1213        /* Ensure that the domain is finalised */
1214        ret = arm_smmu_init_domain_context(domain, smmu);
1215        if (ret < 0)
1216                return ret;
1217
1218        /*
1219         * Sanity check the domain. We don't support domains across
1220         * different SMMUs.
1221         */
1222        if (smmu_domain->smmu != smmu) {
1223                dev_err(dev,
1224                        "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1225                        dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1226                return -EINVAL;
1227        }
1228
1229        /* Looks ok, so add the device to the domain */
1230        return arm_smmu_domain_add_master(smmu_domain, fwspec);
1231}
1232
1233static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1234                        phys_addr_t paddr, size_t size, int prot)
1235{
1236        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1237
1238        if (!ops)
1239                return -ENODEV;
1240
1241        return ops->map(ops, iova, paddr, size, prot);
1242}
1243
1244static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1245                             size_t size)
1246{
1247        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1248
1249        if (!ops)
1250                return 0;
1251
1252        return ops->unmap(ops, iova, size);
1253}
1254
1255static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1256{
1257        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258
1259        if (smmu_domain->tlb_ops)
1260                smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1261}
1262
1263static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1264                                              dma_addr_t iova)
1265{
1266        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267        struct arm_smmu_device *smmu = smmu_domain->smmu;
1268        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1269        struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1270        struct device *dev = smmu->dev;
1271        void __iomem *cb_base;
1272        u32 tmp;
1273        u64 phys;
1274        unsigned long va, flags;
1275
1276        cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1277
1278        spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1279        /* ATS1 registers can only be written atomically */
1280        va = iova & ~0xfffUL;
1281        if (smmu->version == ARM_SMMU_V2)
1282                smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1283        else /* Register is only 32-bit in v1 */
1284                writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1285
1286        if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1287                                      !(tmp & ATSR_ACTIVE), 5, 50)) {
1288                spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1289                dev_err(dev,
1290                        "iova to phys timed out on %pad. Falling back to software table walk.\n",
1291                        &iova);
1292                return ops->iova_to_phys(ops, iova);
1293        }
1294
1295        phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1296        spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1297        if (phys & CB_PAR_F) {
1298                dev_err(dev, "translation fault!\n");
1299                dev_err(dev, "PAR = 0x%llx\n", phys);
1300                return 0;
1301        }
1302
1303        return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1304}
1305
1306static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307                                        dma_addr_t iova)
1308{
1309        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1310        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1311
1312        if (domain->type == IOMMU_DOMAIN_IDENTITY)
1313                return iova;
1314
1315        if (!ops)
1316                return 0;
1317
1318        if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1319                        smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1320                return arm_smmu_iova_to_phys_hard(domain, iova);
1321
1322        return ops->iova_to_phys(ops, iova);
1323}
1324
1325static bool arm_smmu_capable(enum iommu_cap cap)
1326{
1327        switch (cap) {
1328        case IOMMU_CAP_CACHE_COHERENCY:
1329                /*
1330                 * Return true here as the SMMU can always send out coherent
1331                 * requests.
1332                 */
1333                return true;
1334        case IOMMU_CAP_NOEXEC:
1335                return true;
1336        default:
1337                return false;
1338        }
1339}
1340
1341static int arm_smmu_match_node(struct device *dev, void *data)
1342{
1343        return dev->fwnode == data;
1344}
1345
1346static
1347struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1348{
1349        struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1350                                                fwnode, arm_smmu_match_node);
1351        put_device(dev);
1352        return dev ? dev_get_drvdata(dev) : NULL;
1353}
1354
1355static int arm_smmu_add_device(struct device *dev)
1356{
1357        struct arm_smmu_device *smmu;
1358        struct arm_smmu_master_cfg *cfg;
1359        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1360        int i, ret;
1361
1362        if (using_legacy_binding) {
1363                ret = arm_smmu_register_legacy_master(dev, &smmu);
1364
1365                /*
1366                 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1367                 * will allocate/initialise a new one. Thus we need to update fwspec for
1368                 * later use.
1369                 */
1370                fwspec = dev->iommu_fwspec;
1371                if (ret)
1372                        goto out_free;
1373        } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1374                smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1375        } else {
1376                return -ENODEV;
1377        }
1378
1379        ret = -EINVAL;
1380        for (i = 0; i < fwspec->num_ids; i++) {
1381                u16 sid = fwspec->ids[i];
1382                u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1383
1384                if (sid & ~smmu->streamid_mask) {
1385                        dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1386                                sid, smmu->streamid_mask);
1387                        goto out_free;
1388                }
1389                if (mask & ~smmu->smr_mask_mask) {
1390                        dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1391                                mask, smmu->smr_mask_mask);
1392                        goto out_free;
1393                }
1394        }
1395
1396        ret = -ENOMEM;
1397        cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1398                      GFP_KERNEL);
1399        if (!cfg)
1400                goto out_free;
1401
1402        cfg->smmu = smmu;
1403        fwspec->iommu_priv = cfg;
1404        while (i--)
1405                cfg->smendx[i] = INVALID_SMENDX;
1406
1407        ret = arm_smmu_master_alloc_smes(dev);
1408        if (ret)
1409                goto out_cfg_free;
1410
1411        iommu_device_link(&smmu->iommu, dev);
1412
1413        return 0;
1414
1415out_cfg_free:
1416        kfree(cfg);
1417out_free:
1418        iommu_fwspec_free(dev);
1419        return ret;
1420}
1421
1422static void arm_smmu_remove_device(struct device *dev)
1423{
1424        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1425        struct arm_smmu_master_cfg *cfg;
1426        struct arm_smmu_device *smmu;
1427
1428
1429        if (!fwspec || fwspec->ops != &arm_smmu_ops)
1430                return;
1431
1432        cfg  = fwspec->iommu_priv;
1433        smmu = cfg->smmu;
1434
1435        iommu_device_unlink(&smmu->iommu, dev);
1436        arm_smmu_master_free_smes(fwspec);
1437        iommu_group_remove_device(dev);
1438        kfree(fwspec->iommu_priv);
1439        iommu_fwspec_free(dev);
1440}
1441
1442static struct iommu_group *arm_smmu_device_group(struct device *dev)
1443{
1444        struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1445        struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1446        struct iommu_group *group = NULL;
1447        int i, idx;
1448
1449        for_each_cfg_sme(fwspec, i, idx) {
1450                if (group && smmu->s2crs[idx].group &&
1451                    group != smmu->s2crs[idx].group)
1452                        return ERR_PTR(-EINVAL);
1453
1454                group = smmu->s2crs[idx].group;
1455        }
1456
1457        if (group)
1458                return iommu_group_ref_get(group);
1459
1460        if (dev_is_pci(dev))
1461                group = pci_device_group(dev);
1462        else
1463                group = generic_device_group(dev);
1464
1465        return group;
1466}
1467
1468static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1469                                    enum iommu_attr attr, void *data)
1470{
1471        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1472
1473        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1474                return -EINVAL;
1475
1476        switch (attr) {
1477        case DOMAIN_ATTR_NESTING:
1478                *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1479                return 0;
1480        default:
1481                return -ENODEV;
1482        }
1483}
1484
1485static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1486                                    enum iommu_attr attr, void *data)
1487{
1488        int ret = 0;
1489        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490
1491        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492                return -EINVAL;
1493
1494        mutex_lock(&smmu_domain->init_mutex);
1495
1496        switch (attr) {
1497        case DOMAIN_ATTR_NESTING:
1498                if (smmu_domain->smmu) {
1499                        ret = -EPERM;
1500                        goto out_unlock;
1501                }
1502
1503                if (*(int *)data)
1504                        smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1505                else
1506                        smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1507
1508                break;
1509        default:
1510                ret = -ENODEV;
1511        }
1512
1513out_unlock:
1514        mutex_unlock(&smmu_domain->init_mutex);
1515        return ret;
1516}
1517
1518static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1519{
1520        u32 mask, fwid = 0;
1521
1522        if (args->args_count > 0)
1523                fwid |= (u16)args->args[0];
1524
1525        if (args->args_count > 1)
1526                fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1527        else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528                fwid |= (u16)mask << SMR_MASK_SHIFT;
1529
1530        return iommu_fwspec_add_ids(dev, &fwid, 1);
1531}
1532
1533static void arm_smmu_get_resv_regions(struct device *dev,
1534                                      struct list_head *head)
1535{
1536        struct iommu_resv_region *region;
1537        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1538
1539        region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540                                         prot, IOMMU_RESV_SW_MSI);
1541        if (!region)
1542                return;
1543
1544        list_add_tail(&region->list, head);
1545
1546        iommu_dma_get_resv_regions(dev, head);
1547}
1548
1549static void arm_smmu_put_resv_regions(struct device *dev,
1550                                      struct list_head *head)
1551{
1552        struct iommu_resv_region *entry, *next;
1553
1554        list_for_each_entry_safe(entry, next, head, list)
1555                kfree(entry);
1556}
1557
1558static struct iommu_ops arm_smmu_ops = {
1559        .capable                = arm_smmu_capable,
1560        .domain_alloc           = arm_smmu_domain_alloc,
1561        .domain_free            = arm_smmu_domain_free,
1562        .attach_dev             = arm_smmu_attach_dev,
1563        .map                    = arm_smmu_map,
1564        .unmap                  = arm_smmu_unmap,
1565        .map_sg                 = default_iommu_map_sg,
1566        .flush_iotlb_all        = arm_smmu_iotlb_sync,
1567        .iotlb_sync             = arm_smmu_iotlb_sync,
1568        .iova_to_phys           = arm_smmu_iova_to_phys,
1569        .add_device             = arm_smmu_add_device,
1570        .remove_device          = arm_smmu_remove_device,
1571        .device_group           = arm_smmu_device_group,
1572        .domain_get_attr        = arm_smmu_domain_get_attr,
1573        .domain_set_attr        = arm_smmu_domain_set_attr,
1574        .of_xlate               = arm_smmu_of_xlate,
1575        .get_resv_regions       = arm_smmu_get_resv_regions,
1576        .put_resv_regions       = arm_smmu_put_resv_regions,
1577        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1578};
1579
1580static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1581{
1582        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1583        int i;
1584        u32 reg, major;
1585
1586        /* clear global FSR */
1587        reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1588        writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1589
1590        /*
1591         * Reset stream mapping groups: Initial values mark all SMRn as
1592         * invalid and all S2CRn as bypass unless overridden.
1593         */
1594        for (i = 0; i < smmu->num_mapping_groups; ++i)
1595                arm_smmu_write_sme(smmu, i);
1596
1597        if (smmu->model == ARM_MMU500) {
1598                /*
1599                 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1600                 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1601                 * bit is only present in MMU-500r2 onwards.
1602                 */
1603                reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1604                major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1605                reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1606                if (major >= 2)
1607                        reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1608                /*
1609                 * Allow unmatched Stream IDs to allocate bypass
1610                 * TLB entries for reduced latency.
1611                 */
1612                reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1613                writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1614        }
1615
1616        /* Make sure all context banks are disabled and clear CB_FSR  */
1617        for (i = 0; i < smmu->num_context_banks; ++i) {
1618                void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1619
1620                arm_smmu_write_context_bank(smmu, i);
1621                writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1622                /*
1623                 * Disable MMU-500's not-particularly-beneficial next-page
1624                 * prefetcher for the sake of errata #841119 and #826419.
1625                 */
1626                if (smmu->model == ARM_MMU500) {
1627                        reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1628                        reg &= ~ARM_MMU500_ACTLR_CPRE;
1629                        writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1630                }
1631        }
1632
1633        /* Invalidate the TLB, just in case */
1634        writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1635        writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1636
1637        reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1638
1639        /* Enable fault reporting */
1640        reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1641
1642        /* Disable TLB broadcasting. */
1643        reg |= (sCR0_VMIDPNE | sCR0_PTM);
1644
1645        /* Enable client access, handling unmatched streams as appropriate */
1646        reg &= ~sCR0_CLIENTPD;
1647        if (disable_bypass)
1648                reg |= sCR0_USFCFG;
1649        else
1650                reg &= ~sCR0_USFCFG;
1651
1652        /* Disable forced broadcasting */
1653        reg &= ~sCR0_FB;
1654
1655        /* Don't upgrade barriers */
1656        reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1657
1658        if (smmu->features & ARM_SMMU_FEAT_VMID16)
1659                reg |= sCR0_VMID16EN;
1660
1661        if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1662                reg |= sCR0_EXIDENABLE;
1663
1664        /* Push the button */
1665        arm_smmu_tlb_sync_global(smmu);
1666        writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1667}
1668
1669static int arm_smmu_id_size_to_bits(int size)
1670{
1671        switch (size) {
1672        case 0:
1673                return 32;
1674        case 1:
1675                return 36;
1676        case 2:
1677                return 40;
1678        case 3:
1679                return 42;
1680        case 4:
1681                return 44;
1682        case 5:
1683        default:
1684                return 48;
1685        }
1686}
1687
1688static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1689{
1690        unsigned long size;
1691        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1692        u32 id;
1693        bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1694        int i;
1695
1696        dev_notice(smmu->dev, "probing hardware configuration...\n");
1697        dev_notice(smmu->dev, "SMMUv%d with:\n",
1698                        smmu->version == ARM_SMMU_V2 ? 2 : 1);
1699
1700        /* ID0 */
1701        id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1702
1703        /* Restrict available stages based on module parameter */
1704        if (force_stage == 1)
1705                id &= ~(ID0_S2TS | ID0_NTS);
1706        else if (force_stage == 2)
1707                id &= ~(ID0_S1TS | ID0_NTS);
1708
1709        if (id & ID0_S1TS) {
1710                smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1711                dev_notice(smmu->dev, "\tstage 1 translation\n");
1712        }
1713
1714        if (id & ID0_S2TS) {
1715                smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1716                dev_notice(smmu->dev, "\tstage 2 translation\n");
1717        }
1718
1719        if (id & ID0_NTS) {
1720                smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1721                dev_notice(smmu->dev, "\tnested translation\n");
1722        }
1723
1724        if (!(smmu->features &
1725                (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1726                dev_err(smmu->dev, "\tno translation support!\n");
1727                return -ENODEV;
1728        }
1729
1730        if ((id & ID0_S1TS) &&
1731                ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1732                smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1733                dev_notice(smmu->dev, "\taddress translation ops\n");
1734        }
1735
1736        /*
1737         * In order for DMA API calls to work properly, we must defer to what
1738         * the FW says about coherency, regardless of what the hardware claims.
1739         * Fortunately, this also opens up a workaround for systems where the
1740         * ID register value has ended up configured incorrectly.
1741         */
1742        cttw_reg = !!(id & ID0_CTTW);
1743        if (cttw_fw || cttw_reg)
1744                dev_notice(smmu->dev, "\t%scoherent table walk\n",
1745                           cttw_fw ? "" : "non-");
1746        if (cttw_fw != cttw_reg)
1747                dev_notice(smmu->dev,
1748                           "\t(IDR0.CTTW overridden by FW configuration)\n");
1749
1750        /* Max. number of entries we have for stream matching/indexing */
1751        if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1752                smmu->features |= ARM_SMMU_FEAT_EXIDS;
1753                size = 1 << 16;
1754        } else {
1755                size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1756        }
1757        smmu->streamid_mask = size - 1;
1758        if (id & ID0_SMS) {
1759                smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1760                size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1761                if (size == 0) {
1762                        dev_err(smmu->dev,
1763                                "stream-matching supported, but no SMRs present!\n");
1764                        return -ENODEV;
1765                }
1766
1767                /* Zero-initialised to mark as invalid */
1768                smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1769                                          GFP_KERNEL);
1770                if (!smmu->smrs)
1771                        return -ENOMEM;
1772
1773                dev_notice(smmu->dev,
1774                           "\tstream matching with %lu register groups", size);
1775        }
1776        /* s2cr->type == 0 means translation, so initialise explicitly */
1777        smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1778                                         GFP_KERNEL);
1779        if (!smmu->s2crs)
1780                return -ENOMEM;
1781        for (i = 0; i < size; i++)
1782                smmu->s2crs[i] = s2cr_init_val;
1783
1784        smmu->num_mapping_groups = size;
1785        mutex_init(&smmu->stream_map_mutex);
1786        spin_lock_init(&smmu->global_sync_lock);
1787
1788        if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1789                smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1790                if (!(id & ID0_PTFS_NO_AARCH32S))
1791                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1792        }
1793
1794        /* ID1 */
1795        id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1796        smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1797
1798        /* Check for size mismatch of SMMU address space from mapped region */
1799        size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1800        size <<= smmu->pgshift;
1801        if (smmu->cb_base != gr0_base + size)
1802                dev_warn(smmu->dev,
1803                        "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1804                        size * 2, (smmu->cb_base - gr0_base) * 2);
1805
1806        smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1807        smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1808        if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1809                dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1810                return -ENODEV;
1811        }
1812        dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1813                   smmu->num_context_banks, smmu->num_s2_context_banks);
1814        /*
1815         * Cavium CN88xx erratum #27704.
1816         * Ensure ASID and VMID allocation is unique across all SMMUs in
1817         * the system.
1818         */
1819        if (smmu->model == CAVIUM_SMMUV2) {
1820                smmu->cavium_id_base =
1821                        atomic_add_return(smmu->num_context_banks,
1822                                          &cavium_smmu_context_count);
1823                smmu->cavium_id_base -= smmu->num_context_banks;
1824                dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1825        }
1826        smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1827                                 sizeof(*smmu->cbs), GFP_KERNEL);
1828        if (!smmu->cbs)
1829                return -ENOMEM;
1830
1831        /* ID2 */
1832        id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1833        size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1834        smmu->ipa_size = size;
1835
1836        /* The output mask is also applied for bypass */
1837        size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1838        smmu->pa_size = size;
1839
1840        if (id & ID2_VMID16)
1841                smmu->features |= ARM_SMMU_FEAT_VMID16;
1842
1843        /*
1844         * What the page table walker can address actually depends on which
1845         * descriptor format is in use, but since a) we don't know that yet,
1846         * and b) it can vary per context bank, this will have to do...
1847         */
1848        if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1849                dev_warn(smmu->dev,
1850                         "failed to set DMA mask for table walker\n");
1851
1852        if (smmu->version < ARM_SMMU_V2) {
1853                smmu->va_size = smmu->ipa_size;
1854                if (smmu->version == ARM_SMMU_V1_64K)
1855                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1856        } else {
1857                size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1858                smmu->va_size = arm_smmu_id_size_to_bits(size);
1859                if (id & ID2_PTFS_4K)
1860                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1861                if (id & ID2_PTFS_16K)
1862                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1863                if (id & ID2_PTFS_64K)
1864                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1865        }
1866
1867        /* Now we've corralled the various formats, what'll it do? */
1868        if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1869                smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1870        if (smmu->features &
1871            (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1872                smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1873        if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1874                smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1875        if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1876                smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1877
1878        if (arm_smmu_ops.pgsize_bitmap == -1UL)
1879                arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1880        else
1881                arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1882        dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1883                   smmu->pgsize_bitmap);
1884
1885
1886        if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1887                dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1888                           smmu->va_size, smmu->ipa_size);
1889
1890        if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1891                dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1892                           smmu->ipa_size, smmu->pa_size);
1893
1894        return 0;
1895}
1896
1897struct arm_smmu_match_data {
1898        enum arm_smmu_arch_version version;
1899        enum arm_smmu_implementation model;
1900};
1901
1902#define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1903static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1904
1905ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1906ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1907ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1908ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1909ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1910
1911static const struct of_device_id arm_smmu_of_match[] = {
1912        { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1913        { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1914        { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1915        { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1916        { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1917        { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1918        { },
1919};
1920MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1921
1922#ifdef CONFIG_ACPI
1923static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1924{
1925        int ret = 0;
1926
1927        switch (model) {
1928        case ACPI_IORT_SMMU_V1:
1929        case ACPI_IORT_SMMU_CORELINK_MMU400:
1930                smmu->version = ARM_SMMU_V1;
1931                smmu->model = GENERIC_SMMU;
1932                break;
1933        case ACPI_IORT_SMMU_CORELINK_MMU401:
1934                smmu->version = ARM_SMMU_V1_64K;
1935                smmu->model = GENERIC_SMMU;
1936                break;
1937        case ACPI_IORT_SMMU_V2:
1938                smmu->version = ARM_SMMU_V2;
1939                smmu->model = GENERIC_SMMU;
1940                break;
1941        case ACPI_IORT_SMMU_CORELINK_MMU500:
1942                smmu->version = ARM_SMMU_V2;
1943                smmu->model = ARM_MMU500;
1944                break;
1945        case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1946                smmu->version = ARM_SMMU_V2;
1947                smmu->model = CAVIUM_SMMUV2;
1948                break;
1949        default:
1950                ret = -ENODEV;
1951        }
1952
1953        return ret;
1954}
1955
1956static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1957                                      struct arm_smmu_device *smmu)
1958{
1959        struct device *dev = smmu->dev;
1960        struct acpi_iort_node *node =
1961                *(struct acpi_iort_node **)dev_get_platdata(dev);
1962        struct acpi_iort_smmu *iort_smmu;
1963        int ret;
1964
1965        /* Retrieve SMMU1/2 specific data */
1966        iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1967
1968        ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1969        if (ret < 0)
1970                return ret;
1971
1972        /* Ignore the configuration access interrupt */
1973        smmu->num_global_irqs = 1;
1974
1975        if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1976                smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1977
1978        return 0;
1979}
1980#else
1981static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1982                                             struct arm_smmu_device *smmu)
1983{
1984        return -ENODEV;
1985}
1986#endif
1987
1988static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1989                                    struct arm_smmu_device *smmu)
1990{
1991        const struct arm_smmu_match_data *data;
1992        struct device *dev = &pdev->dev;
1993        bool legacy_binding;
1994
1995        if (of_property_read_u32(dev->of_node, "#global-interrupts",
1996                                 &smmu->num_global_irqs)) {
1997                dev_err(dev, "missing #global-interrupts property\n");
1998                return -ENODEV;
1999        }
2000
2001        data = of_device_get_match_data(dev);
2002        smmu->version = data->version;
2003        smmu->model = data->model;
2004
2005        parse_driver_options(smmu);
2006
2007        legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2008        if (legacy_binding && !using_generic_binding) {
2009                if (!using_legacy_binding)
2010                        pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2011                using_legacy_binding = true;
2012        } else if (!legacy_binding && !using_legacy_binding) {
2013                using_generic_binding = true;
2014        } else {
2015                dev_err(dev, "not probing due to mismatched DT properties\n");
2016                return -ENODEV;
2017        }
2018
2019        if (of_dma_is_coherent(dev->of_node))
2020                smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2021
2022        return 0;
2023}
2024
2025static void arm_smmu_bus_init(void)
2026{
2027        /* Oh, for a proper bus abstraction */
2028        if (!iommu_present(&platform_bus_type))
2029                bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2030#ifdef CONFIG_ARM_AMBA
2031        if (!iommu_present(&amba_bustype))
2032                bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2033#endif
2034#ifdef CONFIG_PCI
2035        if (!iommu_present(&pci_bus_type)) {
2036                pci_request_acs();
2037                bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2038        }
2039#endif
2040}
2041
2042static int arm_smmu_device_probe(struct platform_device *pdev)
2043{
2044        struct resource *res;
2045        resource_size_t ioaddr;
2046        struct arm_smmu_device *smmu;
2047        struct device *dev = &pdev->dev;
2048        int num_irqs, i, err;
2049
2050        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2051        if (!smmu) {
2052                dev_err(dev, "failed to allocate arm_smmu_device\n");
2053                return -ENOMEM;
2054        }
2055        smmu->dev = dev;
2056
2057        if (dev->of_node)
2058                err = arm_smmu_device_dt_probe(pdev, smmu);
2059        else
2060                err = arm_smmu_device_acpi_probe(pdev, smmu);
2061
2062        if (err)
2063                return err;
2064
2065        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2066        ioaddr = res->start;
2067        smmu->base = devm_ioremap_resource(dev, res);
2068        if (IS_ERR(smmu->base))
2069                return PTR_ERR(smmu->base);
2070        smmu->cb_base = smmu->base + resource_size(res) / 2;
2071
2072        num_irqs = 0;
2073        while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2074                num_irqs++;
2075                if (num_irqs > smmu->num_global_irqs)
2076                        smmu->num_context_irqs++;
2077        }
2078
2079        if (!smmu->num_context_irqs) {
2080                dev_err(dev, "found %d interrupts but expected at least %d\n",
2081                        num_irqs, smmu->num_global_irqs + 1);
2082                return -ENODEV;
2083        }
2084
2085        smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2086                                  GFP_KERNEL);
2087        if (!smmu->irqs) {
2088                dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2089                return -ENOMEM;
2090        }
2091
2092        for (i = 0; i < num_irqs; ++i) {
2093                int irq = platform_get_irq(pdev, i);
2094
2095                if (irq < 0) {
2096                        dev_err(dev, "failed to get irq index %d\n", i);
2097                        return -ENODEV;
2098                }
2099                smmu->irqs[i] = irq;
2100        }
2101
2102        err = arm_smmu_device_cfg_probe(smmu);
2103        if (err)
2104                return err;
2105
2106        if (smmu->version == ARM_SMMU_V2 &&
2107            smmu->num_context_banks != smmu->num_context_irqs) {
2108                dev_err(dev,
2109                        "found only %d context interrupt(s) but %d required\n",
2110                        smmu->num_context_irqs, smmu->num_context_banks);
2111                return -ENODEV;
2112        }
2113
2114        for (i = 0; i < smmu->num_global_irqs; ++i) {
2115                err = devm_request_irq(smmu->dev, smmu->irqs[i],
2116                                       arm_smmu_global_fault,
2117                                       IRQF_SHARED,
2118                                       "arm-smmu global fault",
2119                                       smmu);
2120                if (err) {
2121                        dev_err(dev, "failed to request global IRQ %d (%u)\n",
2122                                i, smmu->irqs[i]);
2123                        return err;
2124                }
2125        }
2126
2127        err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2128                                     "smmu.%pa", &ioaddr);
2129        if (err) {
2130                dev_err(dev, "Failed to register iommu in sysfs\n");
2131                return err;
2132        }
2133
2134        iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2135        iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2136
2137        err = iommu_device_register(&smmu->iommu);
2138        if (err) {
2139                dev_err(dev, "Failed to register iommu\n");
2140                return err;
2141        }
2142
2143        platform_set_drvdata(pdev, smmu);
2144        arm_smmu_device_reset(smmu);
2145        arm_smmu_test_smr_masks(smmu);
2146
2147        /*
2148         * For ACPI and generic DT bindings, an SMMU will be probed before
2149         * any device which might need it, so we want the bus ops in place
2150         * ready to handle default domain setup as soon as any SMMU exists.
2151         */
2152        if (!using_legacy_binding)
2153                arm_smmu_bus_init();
2154
2155        return 0;
2156}
2157
2158/*
2159 * With the legacy DT binding in play, though, we have no guarantees about
2160 * probe order, but then we're also not doing default domains, so we can
2161 * delay setting bus ops until we're sure every possible SMMU is ready,
2162 * and that way ensure that no add_device() calls get missed.
2163 */
2164static int arm_smmu_legacy_bus_init(void)
2165{
2166        if (using_legacy_binding)
2167                arm_smmu_bus_init();
2168        return 0;
2169}
2170device_initcall_sync(arm_smmu_legacy_bus_init);
2171
2172static int arm_smmu_device_remove(struct platform_device *pdev)
2173{
2174        struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2175
2176        if (!smmu)
2177                return -ENODEV;
2178
2179        if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2180                dev_err(&pdev->dev, "removing device with active domains!\n");
2181
2182        /* Turn the thing off */
2183        writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2184        return 0;
2185}
2186
2187static void arm_smmu_device_shutdown(struct platform_device *pdev)
2188{
2189        arm_smmu_device_remove(pdev);
2190}
2191
2192static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2193{
2194        struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2195
2196        arm_smmu_device_reset(smmu);
2197        return 0;
2198}
2199
2200static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2201
2202static struct platform_driver arm_smmu_driver = {
2203        .driver = {
2204                .name           = "arm-smmu",
2205                .of_match_table = of_match_ptr(arm_smmu_of_match),
2206                .pm             = &arm_smmu_pm_ops,
2207        },
2208        .probe  = arm_smmu_device_probe,
2209        .remove = arm_smmu_device_remove,
2210        .shutdown = arm_smmu_device_shutdown,
2211};
2212module_platform_driver(arm_smmu_driver);
2213
2214IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
2215IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
2216IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
2217IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
2218IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
2219IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
2220
2221MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2222MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2223MODULE_LICENSE("GPL v2");
2224