linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * IOMMU API for ARM architected SMMUv3 implementations.
   4 *
   5 * Copyright (C) 2015 ARM Limited
   6 *
   7 * Author: Will Deacon <will.deacon@arm.com>
   8 *
   9 * This driver is powered by bad coffee and bombay mix.
  10 */
  11
  12#include <linux/acpi.h>
  13#include <linux/acpi_iort.h>
  14#include <linux/bitops.h>
  15#include <linux/crash_dump.h>
  16#include <linux/delay.h>
  17#include <linux/dma-iommu.h>
  18#include <linux/err.h>
  19#include <linux/interrupt.h>
  20#include <linux/io-pgtable.h>
  21#include <linux/iopoll.h>
  22#include <linux/module.h>
  23#include <linux/msi.h>
  24#include <linux/of.h>
  25#include <linux/of_address.h>
  26#include <linux/of_iommu.h>
  27#include <linux/of_platform.h>
  28#include <linux/pci.h>
  29#include <linux/pci-ats.h>
  30#include <linux/platform_device.h>
  31
  32#include <linux/amba/bus.h>
  33
  34#include "arm-smmu-v3.h"
  35
  36static bool disable_bypass = 1;
  37module_param(disable_bypass, bool, 0444);
  38MODULE_PARM_DESC(disable_bypass,
  39        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
  40
  41static bool disable_msipolling;
  42module_param(disable_msipolling, bool, 0444);
  43MODULE_PARM_DESC(disable_msipolling,
  44        "Disable MSI-based polling for CMD_SYNC completion.");
  45
  46enum arm_smmu_msi_index {
  47        EVTQ_MSI_INDEX,
  48        GERROR_MSI_INDEX,
  49        PRIQ_MSI_INDEX,
  50        ARM_SMMU_MAX_MSIS,
  51};
  52
  53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
  54        [EVTQ_MSI_INDEX] = {
  55                ARM_SMMU_EVTQ_IRQ_CFG0,
  56                ARM_SMMU_EVTQ_IRQ_CFG1,
  57                ARM_SMMU_EVTQ_IRQ_CFG2,
  58        },
  59        [GERROR_MSI_INDEX] = {
  60                ARM_SMMU_GERROR_IRQ_CFG0,
  61                ARM_SMMU_GERROR_IRQ_CFG1,
  62                ARM_SMMU_GERROR_IRQ_CFG2,
  63        },
  64        [PRIQ_MSI_INDEX] = {
  65                ARM_SMMU_PRIQ_IRQ_CFG0,
  66                ARM_SMMU_PRIQ_IRQ_CFG1,
  67                ARM_SMMU_PRIQ_IRQ_CFG2,
  68        },
  69};
  70
  71struct arm_smmu_option_prop {
  72        u32 opt;
  73        const char *prop;
  74};
  75
  76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
  77DEFINE_MUTEX(arm_smmu_asid_lock);
  78
  79static struct arm_smmu_option_prop arm_smmu_options[] = {
  80        { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
  81        { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
  82        { 0, NULL},
  83};
  84
  85static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
  86                                                 struct arm_smmu_device *smmu)
  87{
  88        if (offset > SZ_64K)
  89                return smmu->page1 + offset - SZ_64K;
  90
  91        return smmu->base + offset;
  92}
  93
  94static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
  95{
  96        return container_of(dom, struct arm_smmu_domain, domain);
  97}
  98
  99static void parse_driver_options(struct arm_smmu_device *smmu)
 100{
 101        int i = 0;
 102
 103        do {
 104                if (of_property_read_bool(smmu->dev->of_node,
 105                                                arm_smmu_options[i].prop)) {
 106                        smmu->options |= arm_smmu_options[i].opt;
 107                        dev_notice(smmu->dev, "option %s\n",
 108                                arm_smmu_options[i].prop);
 109                }
 110        } while (arm_smmu_options[++i].opt);
 111}
 112
 113/* Low-level queue manipulation functions */
 114static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
 115{
 116        u32 space, prod, cons;
 117
 118        prod = Q_IDX(q, q->prod);
 119        cons = Q_IDX(q, q->cons);
 120
 121        if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
 122                space = (1 << q->max_n_shift) - (prod - cons);
 123        else
 124                space = cons - prod;
 125
 126        return space >= n;
 127}
 128
 129static bool queue_full(struct arm_smmu_ll_queue *q)
 130{
 131        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 132               Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
 133}
 134
 135static bool queue_empty(struct arm_smmu_ll_queue *q)
 136{
 137        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 138               Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
 139}
 140
 141static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
 142{
 143        return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
 144                (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
 145               ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
 146                (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
 147}
 148
 149static void queue_sync_cons_out(struct arm_smmu_queue *q)
 150{
 151        /*
 152         * Ensure that all CPU accesses (reads and writes) to the queue
 153         * are complete before we update the cons pointer.
 154         */
 155        __iomb();
 156        writel_relaxed(q->llq.cons, q->cons_reg);
 157}
 158
 159static void queue_inc_cons(struct arm_smmu_ll_queue *q)
 160{
 161        u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
 162        q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
 163}
 164
 165static int queue_sync_prod_in(struct arm_smmu_queue *q)
 166{
 167        u32 prod;
 168        int ret = 0;
 169
 170        /*
 171         * We can't use the _relaxed() variant here, as we must prevent
 172         * speculative reads of the queue before we have determined that
 173         * prod has indeed moved.
 174         */
 175        prod = readl(q->prod_reg);
 176
 177        if (Q_OVF(prod) != Q_OVF(q->llq.prod))
 178                ret = -EOVERFLOW;
 179
 180        q->llq.prod = prod;
 181        return ret;
 182}
 183
 184static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
 185{
 186        u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
 187        return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
 188}
 189
 190static void queue_poll_init(struct arm_smmu_device *smmu,
 191                            struct arm_smmu_queue_poll *qp)
 192{
 193        qp->delay = 1;
 194        qp->spin_cnt = 0;
 195        qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 196        qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 197}
 198
 199static int queue_poll(struct arm_smmu_queue_poll *qp)
 200{
 201        if (ktime_compare(ktime_get(), qp->timeout) > 0)
 202                return -ETIMEDOUT;
 203
 204        if (qp->wfe) {
 205                wfe();
 206        } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
 207                cpu_relax();
 208        } else {
 209                udelay(qp->delay);
 210                qp->delay *= 2;
 211                qp->spin_cnt = 0;
 212        }
 213
 214        return 0;
 215}
 216
 217static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
 218{
 219        int i;
 220
 221        for (i = 0; i < n_dwords; ++i)
 222                *dst++ = cpu_to_le64(*src++);
 223}
 224
 225static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
 226{
 227        int i;
 228
 229        for (i = 0; i < n_dwords; ++i)
 230                *dst++ = le64_to_cpu(*src++);
 231}
 232
 233static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 234{
 235        if (queue_empty(&q->llq))
 236                return -EAGAIN;
 237
 238        queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
 239        queue_inc_cons(&q->llq);
 240        queue_sync_cons_out(q);
 241        return 0;
 242}
 243
 244/* High-level queue accessors */
 245static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 246{
 247        memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
 248        cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 249
 250        switch (ent->opcode) {
 251        case CMDQ_OP_TLBI_EL2_ALL:
 252        case CMDQ_OP_TLBI_NSNH_ALL:
 253                break;
 254        case CMDQ_OP_PREFETCH_CFG:
 255                cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
 256                cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
 257                cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 258                break;
 259        case CMDQ_OP_CFGI_CD:
 260                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
 261                fallthrough;
 262        case CMDQ_OP_CFGI_STE:
 263                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 264                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 265                break;
 266        case CMDQ_OP_CFGI_CD_ALL:
 267                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 268                break;
 269        case CMDQ_OP_CFGI_ALL:
 270                /* Cover the entire SID range */
 271                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
 272                break;
 273        case CMDQ_OP_TLBI_NH_VA:
 274                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 275                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 276                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 277                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 278                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 279                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 280                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 281                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 282                break;
 283        case CMDQ_OP_TLBI_S2_IPA:
 284                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 285                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 286                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 287                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 288                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 289                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 290                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 291                break;
 292        case CMDQ_OP_TLBI_NH_ASID:
 293                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 294                fallthrough;
 295        case CMDQ_OP_TLBI_S12_VMALL:
 296                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 297                break;
 298        case CMDQ_OP_ATC_INV:
 299                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 300                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
 301                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
 302                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
 303                cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
 304                cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
 305                break;
 306        case CMDQ_OP_PRI_RESP:
 307                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 308                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
 309                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
 310                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
 311                switch (ent->pri.resp) {
 312                case PRI_RESP_DENY:
 313                case PRI_RESP_FAIL:
 314                case PRI_RESP_SUCC:
 315                        break;
 316                default:
 317                        return -EINVAL;
 318                }
 319                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 320                break;
 321        case CMDQ_OP_CMD_SYNC:
 322                if (ent->sync.msiaddr) {
 323                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 324                        cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 325                } else {
 326                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
 327                }
 328                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
 329                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 330                break;
 331        default:
 332                return -ENOENT;
 333        }
 334
 335        return 0;
 336}
 337
 338static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 339                                         u32 prod)
 340{
 341        struct arm_smmu_queue *q = &smmu->cmdq.q;
 342        struct arm_smmu_cmdq_ent ent = {
 343                .opcode = CMDQ_OP_CMD_SYNC,
 344        };
 345
 346        /*
 347         * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 348         * payload, so the write will zero the entire command on that platform.
 349         */
 350        if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
 351                ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 352                                   q->ent_dwords * 8;
 353        }
 354
 355        arm_smmu_cmdq_build_cmd(cmd, &ent);
 356}
 357
 358static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 359{
 360        static const char *cerror_str[] = {
 361                [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
 362                [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
 363                [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
 364                [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
 365        };
 366
 367        int i;
 368        u64 cmd[CMDQ_ENT_DWORDS];
 369        struct arm_smmu_queue *q = &smmu->cmdq.q;
 370        u32 cons = readl_relaxed(q->cons_reg);
 371        u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 372        struct arm_smmu_cmdq_ent cmd_sync = {
 373                .opcode = CMDQ_OP_CMD_SYNC,
 374        };
 375
 376        dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
 377                idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
 378
 379        switch (idx) {
 380        case CMDQ_ERR_CERROR_ABT_IDX:
 381                dev_err(smmu->dev, "retrying command fetch\n");
 382        case CMDQ_ERR_CERROR_NONE_IDX:
 383                return;
 384        case CMDQ_ERR_CERROR_ATC_INV_IDX:
 385                /*
 386                 * ATC Invalidation Completion timeout. CONS is still pointing
 387                 * at the CMD_SYNC. Attempt to complete other pending commands
 388                 * by repeating the CMD_SYNC, though we might well end up back
 389                 * here since the ATC invalidation may still be pending.
 390                 */
 391                return;
 392        case CMDQ_ERR_CERROR_ILL_IDX:
 393        default:
 394                break;
 395        }
 396
 397        /*
 398         * We may have concurrent producers, so we need to be careful
 399         * not to touch any of the shadow cmdq state.
 400         */
 401        queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
 402        dev_err(smmu->dev, "skipping command in error state:\n");
 403        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 404                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 405
 406        /* Convert the erroneous command into a CMD_SYNC */
 407        if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
 408                dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
 409                return;
 410        }
 411
 412        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 413}
 414
 415/*
 416 * Command queue locking.
 417 * This is a form of bastardised rwlock with the following major changes:
 418 *
 419 * - The only LOCK routines are exclusive_trylock() and shared_lock().
 420 *   Neither have barrier semantics, and instead provide only a control
 421 *   dependency.
 422 *
 423 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
 424 *   fails if the caller appears to be the last lock holder (yes, this is
 425 *   racy). All successful UNLOCK routines have RELEASE semantics.
 426 */
 427static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
 428{
 429        int val;
 430
 431        /*
 432         * We can try to avoid the cmpxchg() loop by simply incrementing the
 433         * lock counter. When held in exclusive state, the lock counter is set
 434         * to INT_MIN so these increments won't hurt as the value will remain
 435         * negative.
 436         */
 437        if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
 438                return;
 439
 440        do {
 441                val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
 442        } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
 443}
 444
 445static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
 446{
 447        (void)atomic_dec_return_release(&cmdq->lock);
 448}
 449
 450static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
 451{
 452        if (atomic_read(&cmdq->lock) == 1)
 453                return false;
 454
 455        arm_smmu_cmdq_shared_unlock(cmdq);
 456        return true;
 457}
 458
 459#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
 460({                                                                      \
 461        bool __ret;                                                     \
 462        local_irq_save(flags);                                          \
 463        __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
 464        if (!__ret)                                                     \
 465                local_irq_restore(flags);                               \
 466        __ret;                                                          \
 467})
 468
 469#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
 470({                                                                      \
 471        atomic_set_release(&cmdq->lock, 0);                             \
 472        local_irq_restore(flags);                                       \
 473})
 474
 475
 476/*
 477 * Command queue insertion.
 478 * This is made fiddly by our attempts to achieve some sort of scalability
 479 * since there is one queue shared amongst all of the CPUs in the system.  If
 480 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
 481 * then you'll *love* this monstrosity.
 482 *
 483 * The basic idea is to split the queue up into ranges of commands that are
 484 * owned by a given CPU; the owner may not have written all of the commands
 485 * itself, but is responsible for advancing the hardware prod pointer when
 486 * the time comes. The algorithm is roughly:
 487 *
 488 *      1. Allocate some space in the queue. At this point we also discover
 489 *         whether the head of the queue is currently owned by another CPU,
 490 *         or whether we are the owner.
 491 *
 492 *      2. Write our commands into our allocated slots in the queue.
 493 *
 494 *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
 495 *
 496 *      4. If we are an owner:
 497 *              a. Wait for the previous owner to finish.
 498 *              b. Mark the queue head as unowned, which tells us the range
 499 *                 that we are responsible for publishing.
 500 *              c. Wait for all commands in our owned range to become valid.
 501 *              d. Advance the hardware prod pointer.
 502 *              e. Tell the next owner we've finished.
 503 *
 504 *      5. If we are inserting a CMD_SYNC (we may or may not have been an
 505 *         owner), then we need to stick around until it has completed:
 506 *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
 507 *                 to clear the first 4 bytes.
 508 *              b. Otherwise, we spin waiting for the hardware cons pointer to
 509 *                 advance past our command.
 510 *
 511 * The devil is in the details, particularly the use of locking for handling
 512 * SYNC completion and freeing up space in the queue before we think that it is
 513 * full.
 514 */
 515static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
 516                                               u32 sprod, u32 eprod, bool set)
 517{
 518        u32 swidx, sbidx, ewidx, ebidx;
 519        struct arm_smmu_ll_queue llq = {
 520                .max_n_shift    = cmdq->q.llq.max_n_shift,
 521                .prod           = sprod,
 522        };
 523
 524        ewidx = BIT_WORD(Q_IDX(&llq, eprod));
 525        ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
 526
 527        while (llq.prod != eprod) {
 528                unsigned long mask;
 529                atomic_long_t *ptr;
 530                u32 limit = BITS_PER_LONG;
 531
 532                swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
 533                sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
 534
 535                ptr = &cmdq->valid_map[swidx];
 536
 537                if ((swidx == ewidx) && (sbidx < ebidx))
 538                        limit = ebidx;
 539
 540                mask = GENMASK(limit - 1, sbidx);
 541
 542                /*
 543                 * The valid bit is the inverse of the wrap bit. This means
 544                 * that a zero-initialised queue is invalid and, after marking
 545                 * all entries as valid, they become invalid again when we
 546                 * wrap.
 547                 */
 548                if (set) {
 549                        atomic_long_xor(mask, ptr);
 550                } else { /* Poll */
 551                        unsigned long valid;
 552
 553                        valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
 554                        atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
 555                }
 556
 557                llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
 558        }
 559}
 560
 561/* Mark all entries in the range [sprod, eprod) as valid */
 562static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
 563                                        u32 sprod, u32 eprod)
 564{
 565        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
 566}
 567
 568/* Wait for all entries in the range [sprod, eprod) to become valid */
 569static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
 570                                         u32 sprod, u32 eprod)
 571{
 572        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
 573}
 574
 575/* Wait for the command queue to become non-full */
 576static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 577                                             struct arm_smmu_ll_queue *llq)
 578{
 579        unsigned long flags;
 580        struct arm_smmu_queue_poll qp;
 581        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
 582        int ret = 0;
 583
 584        /*
 585         * Try to update our copy of cons by grabbing exclusive cmdq access. If
 586         * that fails, spin until somebody else updates it for us.
 587         */
 588        if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
 589                WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
 590                arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
 591                llq->val = READ_ONCE(cmdq->q.llq.val);
 592                return 0;
 593        }
 594
 595        queue_poll_init(smmu, &qp);
 596        do {
 597                llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
 598                if (!queue_full(llq))
 599                        break;
 600
 601                ret = queue_poll(&qp);
 602        } while (!ret);
 603
 604        return ret;
 605}
 606
 607/*
 608 * Wait until the SMMU signals a CMD_SYNC completion MSI.
 609 * Must be called with the cmdq lock held in some capacity.
 610 */
 611static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 612                                          struct arm_smmu_ll_queue *llq)
 613{
 614        int ret = 0;
 615        struct arm_smmu_queue_poll qp;
 616        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
 617        u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 618
 619        queue_poll_init(smmu, &qp);
 620
 621        /*
 622         * The MSI won't generate an event, since it's being written back
 623         * into the command queue.
 624         */
 625        qp.wfe = false;
 626        smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
 627        llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
 628        return ret;
 629}
 630
 631/*
 632 * Wait until the SMMU cons index passes llq->prod.
 633 * Must be called with the cmdq lock held in some capacity.
 634 */
 635static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 636                                               struct arm_smmu_ll_queue *llq)
 637{
 638        struct arm_smmu_queue_poll qp;
 639        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
 640        u32 prod = llq->prod;
 641        int ret = 0;
 642
 643        queue_poll_init(smmu, &qp);
 644        llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
 645        do {
 646                if (queue_consumed(llq, prod))
 647                        break;
 648
 649                ret = queue_poll(&qp);
 650
 651                /*
 652                 * This needs to be a readl() so that our subsequent call
 653                 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
 654                 *
 655                 * Specifically, we need to ensure that we observe all
 656                 * shared_lock()s by other CMD_SYNCs that share our owner,
 657                 * so that a failing call to tryunlock() means that we're
 658                 * the last one out and therefore we can safely advance
 659                 * cmdq->q.llq.cons. Roughly speaking:
 660                 *
 661                 * CPU 0                CPU1                    CPU2 (us)
 662                 *
 663                 * if (sync)
 664                 *      shared_lock();
 665                 *
 666                 * dma_wmb();
 667                 * set_valid_map();
 668                 *
 669                 *                      if (owner) {
 670                 *                              poll_valid_map();
 671                 *                              <control dependency>
 672                 *                              writel(prod_reg);
 673                 *
 674                 *                                              readl(cons_reg);
 675                 *                                              tryunlock();
 676                 *
 677                 * Requires us to see CPU 0's shared_lock() acquisition.
 678                 */
 679                llq->cons = readl(cmdq->q.cons_reg);
 680        } while (!ret);
 681
 682        return ret;
 683}
 684
 685static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 686                                         struct arm_smmu_ll_queue *llq)
 687{
 688        if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
 689                return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 690
 691        return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
 692}
 693
 694static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
 695                                        u32 prod, int n)
 696{
 697        int i;
 698        struct arm_smmu_ll_queue llq = {
 699                .max_n_shift    = cmdq->q.llq.max_n_shift,
 700                .prod           = prod,
 701        };
 702
 703        for (i = 0; i < n; ++i) {
 704                u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
 705
 706                prod = queue_inc_prod_n(&llq, i);
 707                queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
 708        }
 709}
 710
 711/*
 712 * This is the actual insertion function, and provides the following
 713 * ordering guarantees to callers:
 714 *
 715 * - There is a dma_wmb() before publishing any commands to the queue.
 716 *   This can be relied upon to order prior writes to data structures
 717 *   in memory (such as a CD or an STE) before the command.
 718 *
 719 * - On completion of a CMD_SYNC, there is a control dependency.
 720 *   This can be relied upon to order subsequent writes to memory (e.g.
 721 *   freeing an IOVA) after completion of the CMD_SYNC.
 722 *
 723 * - Command insertion is totally ordered, so if two CPUs each race to
 724 *   insert their own list of commands then all of the commands from one
 725 *   CPU will appear before any of the commands from the other CPU.
 726 */
 727static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 728                                       u64 *cmds, int n, bool sync)
 729{
 730        u64 cmd_sync[CMDQ_ENT_DWORDS];
 731        u32 prod;
 732        unsigned long flags;
 733        bool owner;
 734        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
 735        struct arm_smmu_ll_queue llq = {
 736                .max_n_shift = cmdq->q.llq.max_n_shift,
 737        }, head = llq;
 738        int ret = 0;
 739
 740        /* 1. Allocate some space in the queue */
 741        local_irq_save(flags);
 742        llq.val = READ_ONCE(cmdq->q.llq.val);
 743        do {
 744                u64 old;
 745
 746                while (!queue_has_space(&llq, n + sync)) {
 747                        local_irq_restore(flags);
 748                        if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
 749                                dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 750                        local_irq_save(flags);
 751                }
 752
 753                head.cons = llq.cons;
 754                head.prod = queue_inc_prod_n(&llq, n + sync) |
 755                                             CMDQ_PROD_OWNED_FLAG;
 756
 757                old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
 758                if (old == llq.val)
 759                        break;
 760
 761                llq.val = old;
 762        } while (1);
 763        owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
 764        head.prod &= ~CMDQ_PROD_OWNED_FLAG;
 765        llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
 766
 767        /*
 768         * 2. Write our commands into the queue
 769         * Dependency ordering from the cmpxchg() loop above.
 770         */
 771        arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 772        if (sync) {
 773                prod = queue_inc_prod_n(&llq, n);
 774                arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
 775                queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 776
 777                /*
 778                 * In order to determine completion of our CMD_SYNC, we must
 779                 * ensure that the queue can't wrap twice without us noticing.
 780                 * We achieve that by taking the cmdq lock as shared before
 781                 * marking our slot as valid.
 782                 */
 783                arm_smmu_cmdq_shared_lock(cmdq);
 784        }
 785
 786        /* 3. Mark our slots as valid, ensuring commands are visible first */
 787        dma_wmb();
 788        arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
 789
 790        /* 4. If we are the owner, take control of the SMMU hardware */
 791        if (owner) {
 792                /* a. Wait for previous owner to finish */
 793                atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
 794
 795                /* b. Stop gathering work by clearing the owned flag */
 796                prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
 797                                                   &cmdq->q.llq.atomic.prod);
 798                prod &= ~CMDQ_PROD_OWNED_FLAG;
 799
 800                /*
 801                 * c. Wait for any gathered work to be written to the queue.
 802                 * Note that we read our own entries so that we have the control
 803                 * dependency required by (d).
 804                 */
 805                arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
 806
 807                /*
 808                 * d. Advance the hardware prod pointer
 809                 * Control dependency ordering from the entries becoming valid.
 810                 */
 811                writel_relaxed(prod, cmdq->q.prod_reg);
 812
 813                /*
 814                 * e. Tell the next owner we're done
 815                 * Make sure we've updated the hardware first, so that we don't
 816                 * race to update prod and potentially move it backwards.
 817                 */
 818                atomic_set_release(&cmdq->owner_prod, prod);
 819        }
 820
 821        /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
 822        if (sync) {
 823                llq.prod = queue_inc_prod_n(&llq, n);
 824                ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
 825                if (ret) {
 826                        dev_err_ratelimited(smmu->dev,
 827                                            "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
 828                                            llq.prod,
 829                                            readl_relaxed(cmdq->q.prod_reg),
 830                                            readl_relaxed(cmdq->q.cons_reg));
 831                }
 832
 833                /*
 834                 * Try to unlock the cmdq lock. This will fail if we're the last
 835                 * reader, in which case we can safely update cmdq->q.llq.cons
 836                 */
 837                if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
 838                        WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
 839                        arm_smmu_cmdq_shared_unlock(cmdq);
 840                }
 841        }
 842
 843        local_irq_restore(flags);
 844        return ret;
 845}
 846
 847static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 848                                   struct arm_smmu_cmdq_ent *ent)
 849{
 850        u64 cmd[CMDQ_ENT_DWORDS];
 851
 852        if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
 853                dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
 854                         ent->opcode);
 855                return -EINVAL;
 856        }
 857
 858        return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
 859}
 860
 861static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 862{
 863        return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
 864}
 865
 866static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 867                                    struct arm_smmu_cmdq_batch *cmds,
 868                                    struct arm_smmu_cmdq_ent *cmd)
 869{
 870        if (cmds->num == CMDQ_BATCH_ENTRIES) {
 871                arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
 872                cmds->num = 0;
 873        }
 874        arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
 875        cmds->num++;
 876}
 877
 878static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
 879                                      struct arm_smmu_cmdq_batch *cmds)
 880{
 881        return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
 882}
 883
 884/* Context descriptor manipulation functions */
 885void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 886{
 887        struct arm_smmu_cmdq_ent cmd = {
 888                .opcode = CMDQ_OP_TLBI_NH_ASID,
 889                .tlbi.asid = asid,
 890        };
 891
 892        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 893        arm_smmu_cmdq_issue_sync(smmu);
 894}
 895
 896static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 897                             int ssid, bool leaf)
 898{
 899        size_t i;
 900        unsigned long flags;
 901        struct arm_smmu_master *master;
 902        struct arm_smmu_cmdq_batch cmds = {};
 903        struct arm_smmu_device *smmu = smmu_domain->smmu;
 904        struct arm_smmu_cmdq_ent cmd = {
 905                .opcode = CMDQ_OP_CFGI_CD,
 906                .cfgi   = {
 907                        .ssid   = ssid,
 908                        .leaf   = leaf,
 909                },
 910        };
 911
 912        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 913        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
 914                for (i = 0; i < master->num_sids; i++) {
 915                        cmd.cfgi.sid = master->sids[i];
 916                        arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 917                }
 918        }
 919        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 920
 921        arm_smmu_cmdq_batch_submit(smmu, &cmds);
 922}
 923
 924static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
 925                                        struct arm_smmu_l1_ctx_desc *l1_desc)
 926{
 927        size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
 928
 929        l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
 930                                             &l1_desc->l2ptr_dma, GFP_KERNEL);
 931        if (!l1_desc->l2ptr) {
 932                dev_warn(smmu->dev,
 933                         "failed to allocate context descriptor table\n");
 934                return -ENOMEM;
 935        }
 936        return 0;
 937}
 938
 939static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 940                                      struct arm_smmu_l1_ctx_desc *l1_desc)
 941{
 942        u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
 943                  CTXDESC_L1_DESC_V;
 944
 945        /* See comment in arm_smmu_write_ctx_desc() */
 946        WRITE_ONCE(*dst, cpu_to_le64(val));
 947}
 948
 949static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
 950                                   u32 ssid)
 951{
 952        __le64 *l1ptr;
 953        unsigned int idx;
 954        struct arm_smmu_l1_ctx_desc *l1_desc;
 955        struct arm_smmu_device *smmu = smmu_domain->smmu;
 956        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
 957
 958        if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
 959                return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
 960
 961        idx = ssid >> CTXDESC_SPLIT;
 962        l1_desc = &cdcfg->l1_desc[idx];
 963        if (!l1_desc->l2ptr) {
 964                if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
 965                        return NULL;
 966
 967                l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
 968                arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
 969                /* An invalid L1CD can be cached */
 970                arm_smmu_sync_cd(smmu_domain, ssid, false);
 971        }
 972        idx = ssid & (CTXDESC_L2_ENTRIES - 1);
 973        return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
 974}
 975
 976int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 977                            struct arm_smmu_ctx_desc *cd)
 978{
 979        /*
 980         * This function handles the following cases:
 981         *
 982         * (1) Install primary CD, for normal DMA traffic (SSID = 0).
 983         * (2) Install a secondary CD, for SID+SSID traffic.
 984         * (3) Update ASID of a CD. Atomically write the first 64 bits of the
 985         *     CD, then invalidate the old entry and mappings.
 986         * (4) Remove a secondary CD.
 987         */
 988        u64 val;
 989        bool cd_live;
 990        __le64 *cdptr;
 991        struct arm_smmu_device *smmu = smmu_domain->smmu;
 992
 993        if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
 994                return -E2BIG;
 995
 996        cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
 997        if (!cdptr)
 998                return -ENOMEM;
 999
1000        val = le64_to_cpu(cdptr[0]);
1001        cd_live = !!(val & CTXDESC_CD_0_V);
1002
1003        if (!cd) { /* (4) */
1004                val = 0;
1005        } else if (cd_live) { /* (3) */
1006                val &= ~CTXDESC_CD_0_ASID;
1007                val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008                /*
1009                 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010                 * this substream's traffic
1011                 */
1012        } else { /* (1) and (2) */
1013                cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014                cdptr[2] = 0;
1015                cdptr[3] = cpu_to_le64(cd->mair);
1016
1017                /*
1018                 * STE is live, and the SMMU might read dwords of this CD in any
1019                 * order. Ensure that it observes valid values before reading
1020                 * V=1.
1021                 */
1022                arm_smmu_sync_cd(smmu_domain, ssid, true);
1023
1024                val = cd->tcr |
1025#ifdef __BIG_ENDIAN
1026                        CTXDESC_CD_0_ENDI |
1027#endif
1028                        CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029                        (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030                        CTXDESC_CD_0_AA64 |
1031                        FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032                        CTXDESC_CD_0_V;
1033
1034                /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035                if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036                        val |= CTXDESC_CD_0_S;
1037        }
1038
1039        /*
1040         * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041         * "Configuration structures and configuration invalidation completion"
1042         *
1043         *   The size of single-copy atomic reads made by the SMMU is
1044         *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045         *   field within an aligned 64-bit span of a structure can be altered
1046         *   without first making the structure invalid.
1047         */
1048        WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049        arm_smmu_sync_cd(smmu_domain, ssid, true);
1050        return 0;
1051}
1052
1053static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054{
1055        int ret;
1056        size_t l1size;
1057        size_t max_contexts;
1058        struct arm_smmu_device *smmu = smmu_domain->smmu;
1059        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060        struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061
1062        max_contexts = 1 << cfg->s1cdmax;
1063
1064        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065            max_contexts <= CTXDESC_L2_ENTRIES) {
1066                cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067                cdcfg->num_l1_ents = max_contexts;
1068
1069                l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070        } else {
1071                cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072                cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073                                                  CTXDESC_L2_ENTRIES);
1074
1075                cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076                                              sizeof(*cdcfg->l1_desc),
1077                                              GFP_KERNEL);
1078                if (!cdcfg->l1_desc)
1079                        return -ENOMEM;
1080
1081                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082        }
1083
1084        cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085                                           GFP_KERNEL);
1086        if (!cdcfg->cdtab) {
1087                dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088                ret = -ENOMEM;
1089                goto err_free_l1;
1090        }
1091
1092        return 0;
1093
1094err_free_l1:
1095        if (cdcfg->l1_desc) {
1096                devm_kfree(smmu->dev, cdcfg->l1_desc);
1097                cdcfg->l1_desc = NULL;
1098        }
1099        return ret;
1100}
1101
1102static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103{
1104        int i;
1105        size_t size, l1size;
1106        struct arm_smmu_device *smmu = smmu_domain->smmu;
1107        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108
1109        if (cdcfg->l1_desc) {
1110                size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111
1112                for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113                        if (!cdcfg->l1_desc[i].l2ptr)
1114                                continue;
1115
1116                        dmam_free_coherent(smmu->dev, size,
1117                                           cdcfg->l1_desc[i].l2ptr,
1118                                           cdcfg->l1_desc[i].l2ptr_dma);
1119                }
1120                devm_kfree(smmu->dev, cdcfg->l1_desc);
1121                cdcfg->l1_desc = NULL;
1122
1123                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124        } else {
1125                l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126        }
1127
1128        dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129        cdcfg->cdtab_dma = 0;
1130        cdcfg->cdtab = NULL;
1131}
1132
1133bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134{
1135        bool free;
1136        struct arm_smmu_ctx_desc *old_cd;
1137
1138        if (!cd->asid)
1139                return false;
1140
1141        free = refcount_dec_and_test(&cd->refs);
1142        if (free) {
1143                old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144                WARN_ON(old_cd != cd);
1145        }
1146        return free;
1147}
1148
1149/* Stream table manipulation functions */
1150static void
1151arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152{
1153        u64 val = 0;
1154
1155        val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156        val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157
1158        /* See comment in arm_smmu_write_ctx_desc() */
1159        WRITE_ONCE(*dst, cpu_to_le64(val));
1160}
1161
1162static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163{
1164        struct arm_smmu_cmdq_ent cmd = {
1165                .opcode = CMDQ_OP_CFGI_STE,
1166                .cfgi   = {
1167                        .sid    = sid,
1168                        .leaf   = true,
1169                },
1170        };
1171
1172        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173        arm_smmu_cmdq_issue_sync(smmu);
1174}
1175
1176static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177                                      __le64 *dst)
1178{
1179        /*
1180         * This is hideously complicated, but we only really care about
1181         * three cases at the moment:
1182         *
1183         * 1. Invalid (all zero) -> bypass/fault (init)
1184         * 2. Bypass/fault -> translation/bypass (attach)
1185         * 3. Translation/bypass -> bypass/fault (detach)
1186         *
1187         * Given that we can't update the STE atomically and the SMMU
1188         * doesn't read the thing in a defined order, that leaves us
1189         * with the following maintenance requirements:
1190         *
1191         * 1. Update Config, return (init time STEs aren't live)
1192         * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193         * 3. Update Config, sync
1194         */
1195        u64 val = le64_to_cpu(dst[0]);
1196        bool ste_live = false;
1197        struct arm_smmu_device *smmu = NULL;
1198        struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199        struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200        struct arm_smmu_domain *smmu_domain = NULL;
1201        struct arm_smmu_cmdq_ent prefetch_cmd = {
1202                .opcode         = CMDQ_OP_PREFETCH_CFG,
1203                .prefetch       = {
1204                        .sid    = sid,
1205                },
1206        };
1207
1208        if (master) {
1209                smmu_domain = master->domain;
1210                smmu = master->smmu;
1211        }
1212
1213        if (smmu_domain) {
1214                switch (smmu_domain->stage) {
1215                case ARM_SMMU_DOMAIN_S1:
1216                        s1_cfg = &smmu_domain->s1_cfg;
1217                        break;
1218                case ARM_SMMU_DOMAIN_S2:
1219                case ARM_SMMU_DOMAIN_NESTED:
1220                        s2_cfg = &smmu_domain->s2_cfg;
1221                        break;
1222                default:
1223                        break;
1224                }
1225        }
1226
1227        if (val & STRTAB_STE_0_V) {
1228                switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229                case STRTAB_STE_0_CFG_BYPASS:
1230                        break;
1231                case STRTAB_STE_0_CFG_S1_TRANS:
1232                case STRTAB_STE_0_CFG_S2_TRANS:
1233                        ste_live = true;
1234                        break;
1235                case STRTAB_STE_0_CFG_ABORT:
1236                        BUG_ON(!disable_bypass);
1237                        break;
1238                default:
1239                        BUG(); /* STE corruption */
1240                }
1241        }
1242
1243        /* Nuke the existing STE_0 value, as we're going to rewrite it */
1244        val = STRTAB_STE_0_V;
1245
1246        /* Bypass/fault */
1247        if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248                if (!smmu_domain && disable_bypass)
1249                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250                else
1251                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252
1253                dst[0] = cpu_to_le64(val);
1254                dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255                                                STRTAB_STE_1_SHCFG_INCOMING));
1256                dst[2] = 0; /* Nuke the VMID */
1257                /*
1258                 * The SMMU can perform negative caching, so we must sync
1259                 * the STE regardless of whether the old value was live.
1260                 */
1261                if (smmu)
1262                        arm_smmu_sync_ste_for_sid(smmu, sid);
1263                return;
1264        }
1265
1266        if (s1_cfg) {
1267                BUG_ON(ste_live);
1268                dst[1] = cpu_to_le64(
1269                         FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270                         FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271                         FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272                         FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273                         FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274
1275                if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276                   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278
1279                val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280                        FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281                        FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282                        FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283        }
1284
1285        if (s2_cfg) {
1286                BUG_ON(ste_live);
1287                dst[2] = cpu_to_le64(
1288                         FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289                         FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290#ifdef __BIG_ENDIAN
1291                         STRTAB_STE_2_S2ENDI |
1292#endif
1293                         STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294                         STRTAB_STE_2_S2R);
1295
1296                dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297
1298                val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299        }
1300
1301        if (master->ats_enabled)
1302                dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303                                                 STRTAB_STE_1_EATS_TRANS));
1304
1305        arm_smmu_sync_ste_for_sid(smmu, sid);
1306        /* See comment in arm_smmu_write_ctx_desc() */
1307        WRITE_ONCE(dst[0], cpu_to_le64(val));
1308        arm_smmu_sync_ste_for_sid(smmu, sid);
1309
1310        /* It's likely that we'll want to use the new STE soon */
1311        if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312                arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313}
1314
1315static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316{
1317        unsigned int i;
1318
1319        for (i = 0; i < nent; ++i) {
1320                arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321                strtab += STRTAB_STE_DWORDS;
1322        }
1323}
1324
1325static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326{
1327        size_t size;
1328        void *strtab;
1329        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330        struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331
1332        if (desc->l2ptr)
1333                return 0;
1334
1335        size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336        strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337
1338        desc->span = STRTAB_SPLIT + 1;
1339        desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340                                          GFP_KERNEL);
1341        if (!desc->l2ptr) {
1342                dev_err(smmu->dev,
1343                        "failed to allocate l2 stream table for SID %u\n",
1344                        sid);
1345                return -ENOMEM;
1346        }
1347
1348        arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349        arm_smmu_write_strtab_l1_desc(strtab, desc);
1350        return 0;
1351}
1352
1353/* IRQ and event handlers */
1354static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355{
1356        int i;
1357        struct arm_smmu_device *smmu = dev;
1358        struct arm_smmu_queue *q = &smmu->evtq.q;
1359        struct arm_smmu_ll_queue *llq = &q->llq;
1360        u64 evt[EVTQ_ENT_DWORDS];
1361
1362        do {
1363                while (!queue_remove_raw(q, evt)) {
1364                        u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365
1366                        dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367                        for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368                                dev_info(smmu->dev, "\t0x%016llx\n",
1369                                         (unsigned long long)evt[i]);
1370
1371                }
1372
1373                /*
1374                 * Not much we can do on overflow, so scream and pretend we're
1375                 * trying harder.
1376                 */
1377                if (queue_sync_prod_in(q) == -EOVERFLOW)
1378                        dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1379        } while (!queue_empty(llq));
1380
1381        /* Sync our overflow flag, as we believe we're up to speed */
1382        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1383                    Q_IDX(llq, llq->cons);
1384        return IRQ_HANDLED;
1385}
1386
1387static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1388{
1389        u32 sid, ssid;
1390        u16 grpid;
1391        bool ssv, last;
1392
1393        sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1394        ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1395        ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1396        last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1397        grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1398
1399        dev_info(smmu->dev, "unexpected PRI request received:\n");
1400        dev_info(smmu->dev,
1401                 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1402                 sid, ssid, grpid, last ? "L" : "",
1403                 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1404                 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1405                 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1406                 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1407                 evt[1] & PRIQ_1_ADDR_MASK);
1408
1409        if (last) {
1410                struct arm_smmu_cmdq_ent cmd = {
1411                        .opcode                 = CMDQ_OP_PRI_RESP,
1412                        .substream_valid        = ssv,
1413                        .pri                    = {
1414                                .sid    = sid,
1415                                .ssid   = ssid,
1416                                .grpid  = grpid,
1417                                .resp   = PRI_RESP_DENY,
1418                        },
1419                };
1420
1421                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1422        }
1423}
1424
1425static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1426{
1427        struct arm_smmu_device *smmu = dev;
1428        struct arm_smmu_queue *q = &smmu->priq.q;
1429        struct arm_smmu_ll_queue *llq = &q->llq;
1430        u64 evt[PRIQ_ENT_DWORDS];
1431
1432        do {
1433                while (!queue_remove_raw(q, evt))
1434                        arm_smmu_handle_ppr(smmu, evt);
1435
1436                if (queue_sync_prod_in(q) == -EOVERFLOW)
1437                        dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1438        } while (!queue_empty(llq));
1439
1440        /* Sync our overflow flag, as we believe we're up to speed */
1441        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1442                      Q_IDX(llq, llq->cons);
1443        queue_sync_cons_out(q);
1444        return IRQ_HANDLED;
1445}
1446
1447static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1448
1449static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1450{
1451        u32 gerror, gerrorn, active;
1452        struct arm_smmu_device *smmu = dev;
1453
1454        gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1455        gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1456
1457        active = gerror ^ gerrorn;
1458        if (!(active & GERROR_ERR_MASK))
1459                return IRQ_NONE; /* No errors pending */
1460
1461        dev_warn(smmu->dev,
1462                 "unexpected global error reported (0x%08x), this could be serious\n",
1463                 active);
1464
1465        if (active & GERROR_SFM_ERR) {
1466                dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1467                arm_smmu_device_disable(smmu);
1468        }
1469
1470        if (active & GERROR_MSI_GERROR_ABT_ERR)
1471                dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1472
1473        if (active & GERROR_MSI_PRIQ_ABT_ERR)
1474                dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1475
1476        if (active & GERROR_MSI_EVTQ_ABT_ERR)
1477                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1478
1479        if (active & GERROR_MSI_CMDQ_ABT_ERR)
1480                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1481
1482        if (active & GERROR_PRIQ_ABT_ERR)
1483                dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1484
1485        if (active & GERROR_EVTQ_ABT_ERR)
1486                dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1487
1488        if (active & GERROR_CMDQ_ERR)
1489                arm_smmu_cmdq_skip_err(smmu);
1490
1491        writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1492        return IRQ_HANDLED;
1493}
1494
1495static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1496{
1497        struct arm_smmu_device *smmu = dev;
1498
1499        arm_smmu_evtq_thread(irq, dev);
1500        if (smmu->features & ARM_SMMU_FEAT_PRI)
1501                arm_smmu_priq_thread(irq, dev);
1502
1503        return IRQ_HANDLED;
1504}
1505
1506static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1507{
1508        arm_smmu_gerror_handler(irq, dev);
1509        return IRQ_WAKE_THREAD;
1510}
1511
1512static void
1513arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1514                        struct arm_smmu_cmdq_ent *cmd)
1515{
1516        size_t log2_span;
1517        size_t span_mask;
1518        /* ATC invalidates are always on 4096-bytes pages */
1519        size_t inval_grain_shift = 12;
1520        unsigned long page_start, page_end;
1521
1522        *cmd = (struct arm_smmu_cmdq_ent) {
1523                .opcode                 = CMDQ_OP_ATC_INV,
1524                .substream_valid        = !!ssid,
1525                .atc.ssid               = ssid,
1526        };
1527
1528        if (!size) {
1529                cmd->atc.size = ATC_INV_SIZE_ALL;
1530                return;
1531        }
1532
1533        page_start      = iova >> inval_grain_shift;
1534        page_end        = (iova + size - 1) >> inval_grain_shift;
1535
1536        /*
1537         * In an ATS Invalidate Request, the address must be aligned on the
1538         * range size, which must be a power of two number of page sizes. We
1539         * thus have to choose between grossly over-invalidating the region, or
1540         * splitting the invalidation into multiple commands. For simplicity
1541         * we'll go with the first solution, but should refine it in the future
1542         * if multiple commands are shown to be more efficient.
1543         *
1544         * Find the smallest power of two that covers the range. The most
1545         * significant differing bit between the start and end addresses,
1546         * fls(start ^ end), indicates the required span. For example:
1547         *
1548         * We want to invalidate pages [8; 11]. This is already the ideal range:
1549         *              x = 0b1000 ^ 0b1011 = 0b11
1550         *              span = 1 << fls(x) = 4
1551         *
1552         * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1553         *              x = 0b0111 ^ 0b1010 = 0b1101
1554         *              span = 1 << fls(x) = 16
1555         */
1556        log2_span       = fls_long(page_start ^ page_end);
1557        span_mask       = (1ULL << log2_span) - 1;
1558
1559        page_start      &= ~span_mask;
1560
1561        cmd->atc.addr   = page_start << inval_grain_shift;
1562        cmd->atc.size   = log2_span;
1563}
1564
1565static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1566{
1567        int i;
1568        struct arm_smmu_cmdq_ent cmd;
1569
1570        arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1571
1572        for (i = 0; i < master->num_sids; i++) {
1573                cmd.atc.sid = master->sids[i];
1574                arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1575        }
1576
1577        return arm_smmu_cmdq_issue_sync(master->smmu);
1578}
1579
1580static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1581                                   int ssid, unsigned long iova, size_t size)
1582{
1583        int i;
1584        unsigned long flags;
1585        struct arm_smmu_cmdq_ent cmd;
1586        struct arm_smmu_master *master;
1587        struct arm_smmu_cmdq_batch cmds = {};
1588
1589        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1590                return 0;
1591
1592        /*
1593         * Ensure that we've completed prior invalidation of the main TLBs
1594         * before we read 'nr_ats_masters' in case of a concurrent call to
1595         * arm_smmu_enable_ats():
1596         *
1597         *      // unmap()                      // arm_smmu_enable_ats()
1598         *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1599         *      smp_mb();                       [...]
1600         *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1601         *
1602         * Ensures that we always see the incremented 'nr_ats_masters' count if
1603         * ATS was enabled at the PCI device before completion of the TLBI.
1604         */
1605        smp_mb();
1606        if (!atomic_read(&smmu_domain->nr_ats_masters))
1607                return 0;
1608
1609        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1610
1611        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1612        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1613                if (!master->ats_enabled)
1614                        continue;
1615
1616                for (i = 0; i < master->num_sids; i++) {
1617                        cmd.atc.sid = master->sids[i];
1618                        arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1619                }
1620        }
1621        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1622
1623        return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1624}
1625
1626/* IO_PGTABLE API */
1627static void arm_smmu_tlb_inv_context(void *cookie)
1628{
1629        struct arm_smmu_domain *smmu_domain = cookie;
1630        struct arm_smmu_device *smmu = smmu_domain->smmu;
1631        struct arm_smmu_cmdq_ent cmd;
1632
1633        /*
1634         * NOTE: when io-pgtable is in non-strict mode, we may get here with
1635         * PTEs previously cleared by unmaps on the current CPU not yet visible
1636         * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1637         * insertion to guarantee those are observed before the TLBI. Do be
1638         * careful, 007.
1639         */
1640        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1641                arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1642        } else {
1643                cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1644                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1645                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1646                arm_smmu_cmdq_issue_sync(smmu);
1647        }
1648        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1649}
1650
1651static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1652                                   size_t granule, bool leaf,
1653                                   struct arm_smmu_domain *smmu_domain)
1654{
1655        struct arm_smmu_device *smmu = smmu_domain->smmu;
1656        unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1657        size_t inv_range = granule;
1658        struct arm_smmu_cmdq_batch cmds = {};
1659        struct arm_smmu_cmdq_ent cmd = {
1660                .tlbi = {
1661                        .leaf   = leaf,
1662                },
1663        };
1664
1665        if (!size)
1666                return;
1667
1668        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1669                cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1670                cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1671        } else {
1672                cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1673                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1674        }
1675
1676        if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1677                /* Get the leaf page size */
1678                tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1679
1680                /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1681                cmd.tlbi.tg = (tg - 10) / 2;
1682
1683                /* Determine what level the granule is at */
1684                cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1685
1686                num_pages = size >> tg;
1687        }
1688
1689        while (iova < end) {
1690                if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1691                        /*
1692                         * On each iteration of the loop, the range is 5 bits
1693                         * worth of the aligned size remaining.
1694                         * The range in pages is:
1695                         *
1696                         * range = (num_pages & (0x1f << __ffs(num_pages)))
1697                         */
1698                        unsigned long scale, num;
1699
1700                        /* Determine the power of 2 multiple number of pages */
1701                        scale = __ffs(num_pages);
1702                        cmd.tlbi.scale = scale;
1703
1704                        /* Determine how many chunks of 2^scale size we have */
1705                        num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1706                        cmd.tlbi.num = num - 1;
1707
1708                        /* range is num * 2^scale * pgsize */
1709                        inv_range = num << (scale + tg);
1710
1711                        /* Clear out the lower order bits for the next iteration */
1712                        num_pages -= num << scale;
1713                }
1714
1715                cmd.tlbi.addr = iova;
1716                arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1717                iova += inv_range;
1718        }
1719        arm_smmu_cmdq_batch_submit(smmu, &cmds);
1720
1721        /*
1722         * Unfortunately, this can't be leaf-only since we may have
1723         * zapped an entire table.
1724         */
1725        arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1726}
1727
1728static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1729                                         unsigned long iova, size_t granule,
1730                                         void *cookie)
1731{
1732        struct arm_smmu_domain *smmu_domain = cookie;
1733        struct iommu_domain *domain = &smmu_domain->domain;
1734
1735        iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1736}
1737
1738static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1739                                  size_t granule, void *cookie)
1740{
1741        arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1742}
1743
1744static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1745                                  size_t granule, void *cookie)
1746{
1747        arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1748}
1749
1750static const struct iommu_flush_ops arm_smmu_flush_ops = {
1751        .tlb_flush_all  = arm_smmu_tlb_inv_context,
1752        .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1753        .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1754        .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1755};
1756
1757/* IOMMU API */
1758static bool arm_smmu_capable(enum iommu_cap cap)
1759{
1760        switch (cap) {
1761        case IOMMU_CAP_CACHE_COHERENCY:
1762                return true;
1763        case IOMMU_CAP_NOEXEC:
1764                return true;
1765        default:
1766                return false;
1767        }
1768}
1769
1770static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1771{
1772        struct arm_smmu_domain *smmu_domain;
1773
1774        if (type != IOMMU_DOMAIN_UNMANAGED &&
1775            type != IOMMU_DOMAIN_DMA &&
1776            type != IOMMU_DOMAIN_IDENTITY)
1777                return NULL;
1778
1779        /*
1780         * Allocate the domain and initialise some of its data structures.
1781         * We can't really do anything meaningful until we've added a
1782         * master.
1783         */
1784        smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1785        if (!smmu_domain)
1786                return NULL;
1787
1788        if (type == IOMMU_DOMAIN_DMA &&
1789            iommu_get_dma_cookie(&smmu_domain->domain)) {
1790                kfree(smmu_domain);
1791                return NULL;
1792        }
1793
1794        mutex_init(&smmu_domain->init_mutex);
1795        INIT_LIST_HEAD(&smmu_domain->devices);
1796        spin_lock_init(&smmu_domain->devices_lock);
1797
1798        return &smmu_domain->domain;
1799}
1800
1801static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1802{
1803        int idx, size = 1 << span;
1804
1805        do {
1806                idx = find_first_zero_bit(map, size);
1807                if (idx == size)
1808                        return -ENOSPC;
1809        } while (test_and_set_bit(idx, map));
1810
1811        return idx;
1812}
1813
1814static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1815{
1816        clear_bit(idx, map);
1817}
1818
1819static void arm_smmu_domain_free(struct iommu_domain *domain)
1820{
1821        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1822        struct arm_smmu_device *smmu = smmu_domain->smmu;
1823
1824        iommu_put_dma_cookie(domain);
1825        free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1826
1827        /* Free the CD and ASID, if we allocated them */
1828        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1829                struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1830
1831                /* Prevent SVA from touching the CD while we're freeing it */
1832                mutex_lock(&arm_smmu_asid_lock);
1833                if (cfg->cdcfg.cdtab)
1834                        arm_smmu_free_cd_tables(smmu_domain);
1835                arm_smmu_free_asid(&cfg->cd);
1836                mutex_unlock(&arm_smmu_asid_lock);
1837        } else {
1838                struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1839                if (cfg->vmid)
1840                        arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1841        }
1842
1843        kfree(smmu_domain);
1844}
1845
1846static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1847                                       struct arm_smmu_master *master,
1848                                       struct io_pgtable_cfg *pgtbl_cfg)
1849{
1850        int ret;
1851        u32 asid;
1852        struct arm_smmu_device *smmu = smmu_domain->smmu;
1853        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1854        typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1855
1856        refcount_set(&cfg->cd.refs, 1);
1857
1858        /* Prevent SVA from modifying the ASID until it is written to the CD */
1859        mutex_lock(&arm_smmu_asid_lock);
1860        ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1861                       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1862        if (ret)
1863                goto out_unlock;
1864
1865        cfg->s1cdmax = master->ssid_bits;
1866
1867        ret = arm_smmu_alloc_cd_tables(smmu_domain);
1868        if (ret)
1869                goto out_free_asid;
1870
1871        cfg->cd.asid    = (u16)asid;
1872        cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1873        cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1874                          FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1875                          FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1876                          FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1877                          FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1878                          FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1879                          CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1880        cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
1881
1882        /*
1883         * Note that this will end up calling arm_smmu_sync_cd() before
1884         * the master has been added to the devices list for this domain.
1885         * This isn't an issue because the STE hasn't been installed yet.
1886         */
1887        ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1888        if (ret)
1889                goto out_free_cd_tables;
1890
1891        mutex_unlock(&arm_smmu_asid_lock);
1892        return 0;
1893
1894out_free_cd_tables:
1895        arm_smmu_free_cd_tables(smmu_domain);
1896out_free_asid:
1897        arm_smmu_free_asid(&cfg->cd);
1898out_unlock:
1899        mutex_unlock(&arm_smmu_asid_lock);
1900        return ret;
1901}
1902
1903static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1904                                       struct arm_smmu_master *master,
1905                                       struct io_pgtable_cfg *pgtbl_cfg)
1906{
1907        int vmid;
1908        struct arm_smmu_device *smmu = smmu_domain->smmu;
1909        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1910        typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1911
1912        vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1913        if (vmid < 0)
1914                return vmid;
1915
1916        vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1917        cfg->vmid       = (u16)vmid;
1918        cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1919        cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1920                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1921                          FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1922                          FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1923                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1924                          FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1925                          FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1926        return 0;
1927}
1928
1929static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1930                                    struct arm_smmu_master *master)
1931{
1932        int ret;
1933        unsigned long ias, oas;
1934        enum io_pgtable_fmt fmt;
1935        struct io_pgtable_cfg pgtbl_cfg;
1936        struct io_pgtable_ops *pgtbl_ops;
1937        int (*finalise_stage_fn)(struct arm_smmu_domain *,
1938                                 struct arm_smmu_master *,
1939                                 struct io_pgtable_cfg *);
1940        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1941        struct arm_smmu_device *smmu = smmu_domain->smmu;
1942
1943        if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1944                smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1945                return 0;
1946        }
1947
1948        /* Restrict the stage to what we can actually support */
1949        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1950                smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1951        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1952                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1953
1954        switch (smmu_domain->stage) {
1955        case ARM_SMMU_DOMAIN_S1:
1956                ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1957                ias = min_t(unsigned long, ias, VA_BITS);
1958                oas = smmu->ias;
1959                fmt = ARM_64_LPAE_S1;
1960                finalise_stage_fn = arm_smmu_domain_finalise_s1;
1961                break;
1962        case ARM_SMMU_DOMAIN_NESTED:
1963        case ARM_SMMU_DOMAIN_S2:
1964                ias = smmu->ias;
1965                oas = smmu->oas;
1966                fmt = ARM_64_LPAE_S2;
1967                finalise_stage_fn = arm_smmu_domain_finalise_s2;
1968                break;
1969        default:
1970                return -EINVAL;
1971        }
1972
1973        pgtbl_cfg = (struct io_pgtable_cfg) {
1974                .pgsize_bitmap  = smmu->pgsize_bitmap,
1975                .ias            = ias,
1976                .oas            = oas,
1977                .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1978                .tlb            = &arm_smmu_flush_ops,
1979                .iommu_dev      = smmu->dev,
1980        };
1981
1982        if (smmu_domain->non_strict)
1983                pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1984
1985        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1986        if (!pgtbl_ops)
1987                return -ENOMEM;
1988
1989        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1990        domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1991        domain->geometry.force_aperture = true;
1992
1993        ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1994        if (ret < 0) {
1995                free_io_pgtable_ops(pgtbl_ops);
1996                return ret;
1997        }
1998
1999        smmu_domain->pgtbl_ops = pgtbl_ops;
2000        return 0;
2001}
2002
2003static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2004{
2005        __le64 *step;
2006        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2007
2008        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2009                struct arm_smmu_strtab_l1_desc *l1_desc;
2010                int idx;
2011
2012                /* Two-level walk */
2013                idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2014                l1_desc = &cfg->l1_desc[idx];
2015                idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2016                step = &l1_desc->l2ptr[idx];
2017        } else {
2018                /* Simple linear lookup */
2019                step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2020        }
2021
2022        return step;
2023}
2024
2025static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2026{
2027        int i, j;
2028        struct arm_smmu_device *smmu = master->smmu;
2029
2030        for (i = 0; i < master->num_sids; ++i) {
2031                u32 sid = master->sids[i];
2032                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2033
2034                /* Bridged PCI devices may end up with duplicated IDs */
2035                for (j = 0; j < i; j++)
2036                        if (master->sids[j] == sid)
2037                                break;
2038                if (j < i)
2039                        continue;
2040
2041                arm_smmu_write_strtab_ent(master, sid, step);
2042        }
2043}
2044
2045static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2046{
2047        struct device *dev = master->dev;
2048        struct arm_smmu_device *smmu = master->smmu;
2049        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2050
2051        if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2052                return false;
2053
2054        if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2055                return false;
2056
2057        return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2058}
2059
2060static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2061{
2062        size_t stu;
2063        struct pci_dev *pdev;
2064        struct arm_smmu_device *smmu = master->smmu;
2065        struct arm_smmu_domain *smmu_domain = master->domain;
2066
2067        /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2068        if (!master->ats_enabled)
2069                return;
2070
2071        /* Smallest Translation Unit: log2 of the smallest supported granule */
2072        stu = __ffs(smmu->pgsize_bitmap);
2073        pdev = to_pci_dev(master->dev);
2074
2075        atomic_inc(&smmu_domain->nr_ats_masters);
2076        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2077        if (pci_enable_ats(pdev, stu))
2078                dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2079}
2080
2081static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2082{
2083        struct arm_smmu_domain *smmu_domain = master->domain;
2084
2085        if (!master->ats_enabled)
2086                return;
2087
2088        pci_disable_ats(to_pci_dev(master->dev));
2089        /*
2090         * Ensure ATS is disabled at the endpoint before we issue the
2091         * ATC invalidation via the SMMU.
2092         */
2093        wmb();
2094        arm_smmu_atc_inv_master(master);
2095        atomic_dec(&smmu_domain->nr_ats_masters);
2096}
2097
2098static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2099{
2100        int ret;
2101        int features;
2102        int num_pasids;
2103        struct pci_dev *pdev;
2104
2105        if (!dev_is_pci(master->dev))
2106                return -ENODEV;
2107
2108        pdev = to_pci_dev(master->dev);
2109
2110        features = pci_pasid_features(pdev);
2111        if (features < 0)
2112                return features;
2113
2114        num_pasids = pci_max_pasids(pdev);
2115        if (num_pasids <= 0)
2116                return num_pasids;
2117
2118        ret = pci_enable_pasid(pdev, features);
2119        if (ret) {
2120                dev_err(&pdev->dev, "Failed to enable PASID\n");
2121                return ret;
2122        }
2123
2124        master->ssid_bits = min_t(u8, ilog2(num_pasids),
2125                                  master->smmu->ssid_bits);
2126        return 0;
2127}
2128
2129static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2130{
2131        struct pci_dev *pdev;
2132
2133        if (!dev_is_pci(master->dev))
2134                return;
2135
2136        pdev = to_pci_dev(master->dev);
2137
2138        if (!pdev->pasid_enabled)
2139                return;
2140
2141        master->ssid_bits = 0;
2142        pci_disable_pasid(pdev);
2143}
2144
2145static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2146{
2147        unsigned long flags;
2148        struct arm_smmu_domain *smmu_domain = master->domain;
2149
2150        if (!smmu_domain)
2151                return;
2152
2153        arm_smmu_disable_ats(master);
2154
2155        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2156        list_del(&master->domain_head);
2157        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2158
2159        master->domain = NULL;
2160        master->ats_enabled = false;
2161        arm_smmu_install_ste_for_dev(master);
2162}
2163
2164static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2165{
2166        int ret = 0;
2167        unsigned long flags;
2168        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2169        struct arm_smmu_device *smmu;
2170        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2171        struct arm_smmu_master *master;
2172
2173        if (!fwspec)
2174                return -ENOENT;
2175
2176        master = dev_iommu_priv_get(dev);
2177        smmu = master->smmu;
2178
2179        /*
2180         * Checking that SVA is disabled ensures that this device isn't bound to
2181         * any mm, and can be safely detached from its old domain. Bonds cannot
2182         * be removed concurrently since we're holding the group mutex.
2183         */
2184        if (arm_smmu_master_sva_enabled(master)) {
2185                dev_err(dev, "cannot attach - SVA enabled\n");
2186                return -EBUSY;
2187        }
2188
2189        arm_smmu_detach_dev(master);
2190
2191        mutex_lock(&smmu_domain->init_mutex);
2192
2193        if (!smmu_domain->smmu) {
2194                smmu_domain->smmu = smmu;
2195                ret = arm_smmu_domain_finalise(domain, master);
2196                if (ret) {
2197                        smmu_domain->smmu = NULL;
2198                        goto out_unlock;
2199                }
2200        } else if (smmu_domain->smmu != smmu) {
2201                dev_err(dev,
2202                        "cannot attach to SMMU %s (upstream of %s)\n",
2203                        dev_name(smmu_domain->smmu->dev),
2204                        dev_name(smmu->dev));
2205                ret = -ENXIO;
2206                goto out_unlock;
2207        } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2208                   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2209                dev_err(dev,
2210                        "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2211                        smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2212                ret = -EINVAL;
2213                goto out_unlock;
2214        }
2215
2216        master->domain = smmu_domain;
2217
2218        if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2219                master->ats_enabled = arm_smmu_ats_supported(master);
2220
2221        arm_smmu_install_ste_for_dev(master);
2222
2223        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2224        list_add(&master->domain_head, &smmu_domain->devices);
2225        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2226
2227        arm_smmu_enable_ats(master);
2228
2229out_unlock:
2230        mutex_unlock(&smmu_domain->init_mutex);
2231        return ret;
2232}
2233
2234static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2235                        phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2236{
2237        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2238
2239        if (!ops)
2240                return -ENODEV;
2241
2242        return ops->map(ops, iova, paddr, size, prot, gfp);
2243}
2244
2245static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2246                             size_t size, struct iommu_iotlb_gather *gather)
2247{
2248        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2249        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2250
2251        if (!ops)
2252                return 0;
2253
2254        return ops->unmap(ops, iova, size, gather);
2255}
2256
2257static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2258{
2259        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2260
2261        if (smmu_domain->smmu)
2262                arm_smmu_tlb_inv_context(smmu_domain);
2263}
2264
2265static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2266                                struct iommu_iotlb_gather *gather)
2267{
2268        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2269
2270        arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2271                               gather->pgsize, true, smmu_domain);
2272}
2273
2274static phys_addr_t
2275arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2276{
2277        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2278
2279        if (domain->type == IOMMU_DOMAIN_IDENTITY)
2280                return iova;
2281
2282        if (!ops)
2283                return 0;
2284
2285        return ops->iova_to_phys(ops, iova);
2286}
2287
2288static struct platform_driver arm_smmu_driver;
2289
2290static
2291struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2292{
2293        struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2294                                                          fwnode);
2295        put_device(dev);
2296        return dev ? dev_get_drvdata(dev) : NULL;
2297}
2298
2299static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2300{
2301        unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2302
2303        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2304                limit *= 1UL << STRTAB_SPLIT;
2305
2306        return sid < limit;
2307}
2308
2309static struct iommu_ops arm_smmu_ops;
2310
2311static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2312{
2313        int i, ret;
2314        struct arm_smmu_device *smmu;
2315        struct arm_smmu_master *master;
2316        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2317
2318        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2319                return ERR_PTR(-ENODEV);
2320
2321        if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2322                return ERR_PTR(-EBUSY);
2323
2324        smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2325        if (!smmu)
2326                return ERR_PTR(-ENODEV);
2327
2328        master = kzalloc(sizeof(*master), GFP_KERNEL);
2329        if (!master)
2330                return ERR_PTR(-ENOMEM);
2331
2332        master->dev = dev;
2333        master->smmu = smmu;
2334        master->sids = fwspec->ids;
2335        master->num_sids = fwspec->num_ids;
2336        INIT_LIST_HEAD(&master->bonds);
2337        dev_iommu_priv_set(dev, master);
2338
2339        /* Check the SIDs are in range of the SMMU and our stream table */
2340        for (i = 0; i < master->num_sids; i++) {
2341                u32 sid = master->sids[i];
2342
2343                if (!arm_smmu_sid_in_range(smmu, sid)) {
2344                        ret = -ERANGE;
2345                        goto err_free_master;
2346                }
2347
2348                /* Ensure l2 strtab is initialised */
2349                if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2350                        ret = arm_smmu_init_l2_strtab(smmu, sid);
2351                        if (ret)
2352                                goto err_free_master;
2353                }
2354        }
2355
2356        master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2357
2358        /*
2359         * Note that PASID must be enabled before, and disabled after ATS:
2360         * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2361         *
2362         *   Behavior is undefined if this bit is Set and the value of the PASID
2363         *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2364         *   are changed.
2365         */
2366        arm_smmu_enable_pasid(master);
2367
2368        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2369                master->ssid_bits = min_t(u8, master->ssid_bits,
2370                                          CTXDESC_LINEAR_CDMAX);
2371
2372        return &smmu->iommu;
2373
2374err_free_master:
2375        kfree(master);
2376        dev_iommu_priv_set(dev, NULL);
2377        return ERR_PTR(ret);
2378}
2379
2380static void arm_smmu_release_device(struct device *dev)
2381{
2382        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2383        struct arm_smmu_master *master;
2384
2385        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2386                return;
2387
2388        master = dev_iommu_priv_get(dev);
2389        WARN_ON(arm_smmu_master_sva_enabled(master));
2390        arm_smmu_detach_dev(master);
2391        arm_smmu_disable_pasid(master);
2392        kfree(master);
2393        iommu_fwspec_free(dev);
2394}
2395
2396static struct iommu_group *arm_smmu_device_group(struct device *dev)
2397{
2398        struct iommu_group *group;
2399
2400        /*
2401         * We don't support devices sharing stream IDs other than PCI RID
2402         * aliases, since the necessary ID-to-device lookup becomes rather
2403         * impractical given a potential sparse 32-bit stream ID space.
2404         */
2405        if (dev_is_pci(dev))
2406                group = pci_device_group(dev);
2407        else
2408                group = generic_device_group(dev);
2409
2410        return group;
2411}
2412
2413static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2414                                    enum iommu_attr attr, void *data)
2415{
2416        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2417
2418        switch (domain->type) {
2419        case IOMMU_DOMAIN_UNMANAGED:
2420                switch (attr) {
2421                case DOMAIN_ATTR_NESTING:
2422                        *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2423                        return 0;
2424                default:
2425                        return -ENODEV;
2426                }
2427                break;
2428        case IOMMU_DOMAIN_DMA:
2429                switch (attr) {
2430                case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2431                        *(int *)data = smmu_domain->non_strict;
2432                        return 0;
2433                default:
2434                        return -ENODEV;
2435                }
2436                break;
2437        default:
2438                return -EINVAL;
2439        }
2440}
2441
2442static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2443                                    enum iommu_attr attr, void *data)
2444{
2445        int ret = 0;
2446        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2447
2448        mutex_lock(&smmu_domain->init_mutex);
2449
2450        switch (domain->type) {
2451        case IOMMU_DOMAIN_UNMANAGED:
2452                switch (attr) {
2453                case DOMAIN_ATTR_NESTING:
2454                        if (smmu_domain->smmu) {
2455                                ret = -EPERM;
2456                                goto out_unlock;
2457                        }
2458
2459                        if (*(int *)data)
2460                                smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2461                        else
2462                                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2463                        break;
2464                default:
2465                        ret = -ENODEV;
2466                }
2467                break;
2468        case IOMMU_DOMAIN_DMA:
2469                switch(attr) {
2470                case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2471                        smmu_domain->non_strict = *(int *)data;
2472                        break;
2473                default:
2474                        ret = -ENODEV;
2475                }
2476                break;
2477        default:
2478                ret = -EINVAL;
2479        }
2480
2481out_unlock:
2482        mutex_unlock(&smmu_domain->init_mutex);
2483        return ret;
2484}
2485
2486static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2487{
2488        return iommu_fwspec_add_ids(dev, args->args, 1);
2489}
2490
2491static void arm_smmu_get_resv_regions(struct device *dev,
2492                                      struct list_head *head)
2493{
2494        struct iommu_resv_region *region;
2495        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2496
2497        region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2498                                         prot, IOMMU_RESV_SW_MSI);
2499        if (!region)
2500                return;
2501
2502        list_add_tail(&region->list, head);
2503
2504        iommu_dma_get_resv_regions(dev, head);
2505}
2506
2507static bool arm_smmu_dev_has_feature(struct device *dev,
2508                                     enum iommu_dev_features feat)
2509{
2510        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2511
2512        if (!master)
2513                return false;
2514
2515        switch (feat) {
2516        case IOMMU_DEV_FEAT_SVA:
2517                return arm_smmu_master_sva_supported(master);
2518        default:
2519                return false;
2520        }
2521}
2522
2523static bool arm_smmu_dev_feature_enabled(struct device *dev,
2524                                         enum iommu_dev_features feat)
2525{
2526        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2527
2528        if (!master)
2529                return false;
2530
2531        switch (feat) {
2532        case IOMMU_DEV_FEAT_SVA:
2533                return arm_smmu_master_sva_enabled(master);
2534        default:
2535                return false;
2536        }
2537}
2538
2539static int arm_smmu_dev_enable_feature(struct device *dev,
2540                                       enum iommu_dev_features feat)
2541{
2542        if (!arm_smmu_dev_has_feature(dev, feat))
2543                return -ENODEV;
2544
2545        if (arm_smmu_dev_feature_enabled(dev, feat))
2546                return -EBUSY;
2547
2548        switch (feat) {
2549        case IOMMU_DEV_FEAT_SVA:
2550                return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2551        default:
2552                return -EINVAL;
2553        }
2554}
2555
2556static int arm_smmu_dev_disable_feature(struct device *dev,
2557                                        enum iommu_dev_features feat)
2558{
2559        if (!arm_smmu_dev_feature_enabled(dev, feat))
2560                return -EINVAL;
2561
2562        switch (feat) {
2563        case IOMMU_DEV_FEAT_SVA:
2564                return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2565        default:
2566                return -EINVAL;
2567        }
2568}
2569
2570static struct iommu_ops arm_smmu_ops = {
2571        .capable                = arm_smmu_capable,
2572        .domain_alloc           = arm_smmu_domain_alloc,
2573        .domain_free            = arm_smmu_domain_free,
2574        .attach_dev             = arm_smmu_attach_dev,
2575        .map                    = arm_smmu_map,
2576        .unmap                  = arm_smmu_unmap,
2577        .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2578        .iotlb_sync             = arm_smmu_iotlb_sync,
2579        .iova_to_phys           = arm_smmu_iova_to_phys,
2580        .probe_device           = arm_smmu_probe_device,
2581        .release_device         = arm_smmu_release_device,
2582        .device_group           = arm_smmu_device_group,
2583        .domain_get_attr        = arm_smmu_domain_get_attr,
2584        .domain_set_attr        = arm_smmu_domain_set_attr,
2585        .of_xlate               = arm_smmu_of_xlate,
2586        .get_resv_regions       = arm_smmu_get_resv_regions,
2587        .put_resv_regions       = generic_iommu_put_resv_regions,
2588        .dev_has_feat           = arm_smmu_dev_has_feature,
2589        .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2590        .dev_enable_feat        = arm_smmu_dev_enable_feature,
2591        .dev_disable_feat       = arm_smmu_dev_disable_feature,
2592        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2593};
2594
2595/* Probing and initialisation functions */
2596static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2597                                   struct arm_smmu_queue *q,
2598                                   unsigned long prod_off,
2599                                   unsigned long cons_off,
2600                                   size_t dwords, const char *name)
2601{
2602        size_t qsz;
2603
2604        do {
2605                qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2606                q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2607                                              GFP_KERNEL);
2608                if (q->base || qsz < PAGE_SIZE)
2609                        break;
2610
2611                q->llq.max_n_shift--;
2612        } while (1);
2613
2614        if (!q->base) {
2615                dev_err(smmu->dev,
2616                        "failed to allocate queue (0x%zx bytes) for %s\n",
2617                        qsz, name);
2618                return -ENOMEM;
2619        }
2620
2621        if (!WARN_ON(q->base_dma & (qsz - 1))) {
2622                dev_info(smmu->dev, "allocated %u entries for %s\n",
2623                         1 << q->llq.max_n_shift, name);
2624        }
2625
2626        q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
2627        q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
2628        q->ent_dwords   = dwords;
2629
2630        q->q_base  = Q_BASE_RWA;
2631        q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2632        q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2633
2634        q->llq.prod = q->llq.cons = 0;
2635        return 0;
2636}
2637
2638static void arm_smmu_cmdq_free_bitmap(void *data)
2639{
2640        unsigned long *bitmap = data;
2641        bitmap_free(bitmap);
2642}
2643
2644static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2645{
2646        int ret = 0;
2647        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2648        unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2649        atomic_long_t *bitmap;
2650
2651        atomic_set(&cmdq->owner_prod, 0);
2652        atomic_set(&cmdq->lock, 0);
2653
2654        bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2655        if (!bitmap) {
2656                dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2657                ret = -ENOMEM;
2658        } else {
2659                cmdq->valid_map = bitmap;
2660                devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2661        }
2662
2663        return ret;
2664}
2665
2666static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2667{
2668        int ret;
2669
2670        /* cmdq */
2671        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2672                                      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2673                                      "cmdq");
2674        if (ret)
2675                return ret;
2676
2677        ret = arm_smmu_cmdq_init(smmu);
2678        if (ret)
2679                return ret;
2680
2681        /* evtq */
2682        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2683                                      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2684                                      "evtq");
2685        if (ret)
2686                return ret;
2687
2688        /* priq */
2689        if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2690                return 0;
2691
2692        return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2693                                       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2694                                       "priq");
2695}
2696
2697static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2698{
2699        unsigned int i;
2700        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2701        size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2702        void *strtab = smmu->strtab_cfg.strtab;
2703
2704        cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2705        if (!cfg->l1_desc) {
2706                dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2707                return -ENOMEM;
2708        }
2709
2710        for (i = 0; i < cfg->num_l1_ents; ++i) {
2711                arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2712                strtab += STRTAB_L1_DESC_DWORDS << 3;
2713        }
2714
2715        return 0;
2716}
2717
2718static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2719{
2720        void *strtab;
2721        u64 reg;
2722        u32 size, l1size;
2723        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2724
2725        /* Calculate the L1 size, capped to the SIDSIZE. */
2726        size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2727        size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2728        cfg->num_l1_ents = 1 << size;
2729
2730        size += STRTAB_SPLIT;
2731        if (size < smmu->sid_bits)
2732                dev_warn(smmu->dev,
2733                         "2-level strtab only covers %u/%u bits of SID\n",
2734                         size, smmu->sid_bits);
2735
2736        l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2737        strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2738                                     GFP_KERNEL);
2739        if (!strtab) {
2740                dev_err(smmu->dev,
2741                        "failed to allocate l1 stream table (%u bytes)\n",
2742                        l1size);
2743                return -ENOMEM;
2744        }
2745        cfg->strtab = strtab;
2746
2747        /* Configure strtab_base_cfg for 2 levels */
2748        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2749        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2750        reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2751        cfg->strtab_base_cfg = reg;
2752
2753        return arm_smmu_init_l1_strtab(smmu);
2754}
2755
2756static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2757{
2758        void *strtab;
2759        u64 reg;
2760        u32 size;
2761        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2762
2763        size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2764        strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2765                                     GFP_KERNEL);
2766        if (!strtab) {
2767                dev_err(smmu->dev,
2768                        "failed to allocate linear stream table (%u bytes)\n",
2769                        size);
2770                return -ENOMEM;
2771        }
2772        cfg->strtab = strtab;
2773        cfg->num_l1_ents = 1 << smmu->sid_bits;
2774
2775        /* Configure strtab_base_cfg for a linear table covering all SIDs */
2776        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2777        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2778        cfg->strtab_base_cfg = reg;
2779
2780        arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2781        return 0;
2782}
2783
2784static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2785{
2786        u64 reg;
2787        int ret;
2788
2789        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2790                ret = arm_smmu_init_strtab_2lvl(smmu);
2791        else
2792                ret = arm_smmu_init_strtab_linear(smmu);
2793
2794        if (ret)
2795                return ret;
2796
2797        /* Set the strtab base address */
2798        reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2799        reg |= STRTAB_BASE_RA;
2800        smmu->strtab_cfg.strtab_base = reg;
2801
2802        /* Allocate the first VMID for stage-2 bypass STEs */
2803        set_bit(0, smmu->vmid_map);
2804        return 0;
2805}
2806
2807static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2808{
2809        int ret;
2810
2811        ret = arm_smmu_init_queues(smmu);
2812        if (ret)
2813                return ret;
2814
2815        return arm_smmu_init_strtab(smmu);
2816}
2817
2818static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2819                                   unsigned int reg_off, unsigned int ack_off)
2820{
2821        u32 reg;
2822
2823        writel_relaxed(val, smmu->base + reg_off);
2824        return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2825                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2826}
2827
2828/* GBPA is "special" */
2829static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2830{
2831        int ret;
2832        u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2833
2834        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2835                                         1, ARM_SMMU_POLL_TIMEOUT_US);
2836        if (ret)
2837                return ret;
2838
2839        reg &= ~clr;
2840        reg |= set;
2841        writel_relaxed(reg | GBPA_UPDATE, gbpa);
2842        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2843                                         1, ARM_SMMU_POLL_TIMEOUT_US);
2844
2845        if (ret)
2846                dev_err(smmu->dev, "GBPA not responding to update\n");
2847        return ret;
2848}
2849
2850static void arm_smmu_free_msis(void *data)
2851{
2852        struct device *dev = data;
2853        platform_msi_domain_free_irqs(dev);
2854}
2855
2856static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2857{
2858        phys_addr_t doorbell;
2859        struct device *dev = msi_desc_to_dev(desc);
2860        struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2861        phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2862
2863        doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2864        doorbell &= MSI_CFG0_ADDR_MASK;
2865
2866        writeq_relaxed(doorbell, smmu->base + cfg[0]);
2867        writel_relaxed(msg->data, smmu->base + cfg[1]);
2868        writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2869}
2870
2871static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2872{
2873        struct msi_desc *desc;
2874        int ret, nvec = ARM_SMMU_MAX_MSIS;
2875        struct device *dev = smmu->dev;
2876
2877        /* Clear the MSI address regs */
2878        writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2879        writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2880
2881        if (smmu->features & ARM_SMMU_FEAT_PRI)
2882                writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2883        else
2884                nvec--;
2885
2886        if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2887                return;
2888
2889        if (!dev->msi_domain) {
2890                dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2891                return;
2892        }
2893
2894        /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2895        ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2896        if (ret) {
2897                dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2898                return;
2899        }
2900
2901        for_each_msi_entry(desc, dev) {
2902                switch (desc->platform.msi_index) {
2903                case EVTQ_MSI_INDEX:
2904                        smmu->evtq.q.irq = desc->irq;
2905                        break;
2906                case GERROR_MSI_INDEX:
2907                        smmu->gerr_irq = desc->irq;
2908                        break;
2909                case PRIQ_MSI_INDEX:
2910                        smmu->priq.q.irq = desc->irq;
2911                        break;
2912                default:        /* Unknown */
2913                        continue;
2914                }
2915        }
2916
2917        /* Add callback to free MSIs on teardown */
2918        devm_add_action(dev, arm_smmu_free_msis, dev);
2919}
2920
2921static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2922{
2923        int irq, ret;
2924
2925        arm_smmu_setup_msis(smmu);
2926
2927        /* Request interrupt lines */
2928        irq = smmu->evtq.q.irq;
2929        if (irq) {
2930                ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2931                                                arm_smmu_evtq_thread,
2932                                                IRQF_ONESHOT,
2933                                                "arm-smmu-v3-evtq", smmu);
2934                if (ret < 0)
2935                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
2936        } else {
2937                dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2938        }
2939
2940        irq = smmu->gerr_irq;
2941        if (irq) {
2942                ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2943                                       0, "arm-smmu-v3-gerror", smmu);
2944                if (ret < 0)
2945                        dev_warn(smmu->dev, "failed to enable gerror irq\n");
2946        } else {
2947                dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2948        }
2949
2950        if (smmu->features & ARM_SMMU_FEAT_PRI) {
2951                irq = smmu->priq.q.irq;
2952                if (irq) {
2953                        ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2954                                                        arm_smmu_priq_thread,
2955                                                        IRQF_ONESHOT,
2956                                                        "arm-smmu-v3-priq",
2957                                                        smmu);
2958                        if (ret < 0)
2959                                dev_warn(smmu->dev,
2960                                         "failed to enable priq irq\n");
2961                } else {
2962                        dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2963                }
2964        }
2965}
2966
2967static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2968{
2969        int ret, irq;
2970        u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2971
2972        /* Disable IRQs first */
2973        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2974                                      ARM_SMMU_IRQ_CTRLACK);
2975        if (ret) {
2976                dev_err(smmu->dev, "failed to disable irqs\n");
2977                return ret;
2978        }
2979
2980        irq = smmu->combined_irq;
2981        if (irq) {
2982                /*
2983                 * Cavium ThunderX2 implementation doesn't support unique irq
2984                 * lines. Use a single irq line for all the SMMUv3 interrupts.
2985                 */
2986                ret = devm_request_threaded_irq(smmu->dev, irq,
2987                                        arm_smmu_combined_irq_handler,
2988                                        arm_smmu_combined_irq_thread,
2989                                        IRQF_ONESHOT,
2990                                        "arm-smmu-v3-combined-irq", smmu);
2991                if (ret < 0)
2992                        dev_warn(smmu->dev, "failed to enable combined irq\n");
2993        } else
2994                arm_smmu_setup_unique_irqs(smmu);
2995
2996        if (smmu->features & ARM_SMMU_FEAT_PRI)
2997                irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2998
2999        /* Enable interrupt generation on the SMMU */
3000        ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3001                                      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3002        if (ret)
3003                dev_warn(smmu->dev, "failed to enable irqs\n");
3004
3005        return 0;
3006}
3007
3008static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3009{
3010        int ret;
3011
3012        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3013        if (ret)
3014                dev_err(smmu->dev, "failed to clear cr0\n");
3015
3016        return ret;
3017}
3018
3019static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3020{
3021        int ret;
3022        u32 reg, enables;
3023        struct arm_smmu_cmdq_ent cmd;
3024
3025        /* Clear CR0 and sync (disables SMMU and queue processing) */
3026        reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3027        if (reg & CR0_SMMUEN) {
3028                dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3029                WARN_ON(is_kdump_kernel() && !disable_bypass);
3030                arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3031        }
3032
3033        ret = arm_smmu_device_disable(smmu);
3034        if (ret)
3035                return ret;
3036
3037        /* CR1 (table and queue memory attributes) */
3038        reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3039              FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3040              FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3041              FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3042              FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3043              FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3044        writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3045
3046        /* CR2 (random crap) */
3047        reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3048        writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3049
3050        /* Stream table */
3051        writeq_relaxed(smmu->strtab_cfg.strtab_base,
3052                       smmu->base + ARM_SMMU_STRTAB_BASE);
3053        writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3054                       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3055
3056        /* Command queue */
3057        writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3058        writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3059        writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3060
3061        enables = CR0_CMDQEN;
3062        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3063                                      ARM_SMMU_CR0ACK);
3064        if (ret) {
3065                dev_err(smmu->dev, "failed to enable command queue\n");
3066                return ret;
3067        }
3068
3069        /* Invalidate any cached configuration */
3070        cmd.opcode = CMDQ_OP_CFGI_ALL;
3071        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3072        arm_smmu_cmdq_issue_sync(smmu);
3073
3074        /* Invalidate any stale TLB entries */
3075        if (smmu->features & ARM_SMMU_FEAT_HYP) {
3076                cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3077                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3078        }
3079
3080        cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3081        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3082        arm_smmu_cmdq_issue_sync(smmu);
3083
3084        /* Event queue */
3085        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3086        writel_relaxed(smmu->evtq.q.llq.prod,
3087                       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3088        writel_relaxed(smmu->evtq.q.llq.cons,
3089                       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3090
3091        enables |= CR0_EVTQEN;
3092        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3093                                      ARM_SMMU_CR0ACK);
3094        if (ret) {
3095                dev_err(smmu->dev, "failed to enable event queue\n");
3096                return ret;
3097        }
3098
3099        /* PRI queue */
3100        if (smmu->features & ARM_SMMU_FEAT_PRI) {
3101                writeq_relaxed(smmu->priq.q.q_base,
3102                               smmu->base + ARM_SMMU_PRIQ_BASE);
3103                writel_relaxed(smmu->priq.q.llq.prod,
3104                               arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3105                writel_relaxed(smmu->priq.q.llq.cons,
3106                               arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3107
3108                enables |= CR0_PRIQEN;
3109                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3110                                              ARM_SMMU_CR0ACK);
3111                if (ret) {
3112                        dev_err(smmu->dev, "failed to enable PRI queue\n");
3113                        return ret;
3114                }
3115        }
3116
3117        if (smmu->features & ARM_SMMU_FEAT_ATS) {
3118                enables |= CR0_ATSCHK;
3119                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3120                                              ARM_SMMU_CR0ACK);
3121                if (ret) {
3122                        dev_err(smmu->dev, "failed to enable ATS check\n");
3123                        return ret;
3124                }
3125        }
3126
3127        ret = arm_smmu_setup_irqs(smmu);
3128        if (ret) {
3129                dev_err(smmu->dev, "failed to setup irqs\n");
3130                return ret;
3131        }
3132
3133        if (is_kdump_kernel())
3134                enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3135
3136        /* Enable the SMMU interface, or ensure bypass */
3137        if (!bypass || disable_bypass) {
3138                enables |= CR0_SMMUEN;
3139        } else {
3140                ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3141                if (ret)
3142                        return ret;
3143        }
3144        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3145                                      ARM_SMMU_CR0ACK);
3146        if (ret) {
3147                dev_err(smmu->dev, "failed to enable SMMU interface\n");
3148                return ret;
3149        }
3150
3151        return 0;
3152}
3153
3154static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3155{
3156        u32 reg;
3157        bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3158
3159        /* IDR0 */
3160        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3161
3162        /* 2-level structures */
3163        if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3164                smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3165
3166        if (reg & IDR0_CD2L)
3167                smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3168
3169        /*
3170         * Translation table endianness.
3171         * We currently require the same endianness as the CPU, but this
3172         * could be changed later by adding a new IO_PGTABLE_QUIRK.
3173         */
3174        switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3175        case IDR0_TTENDIAN_MIXED:
3176                smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3177                break;
3178#ifdef __BIG_ENDIAN
3179        case IDR0_TTENDIAN_BE:
3180                smmu->features |= ARM_SMMU_FEAT_TT_BE;
3181                break;
3182#else
3183        case IDR0_TTENDIAN_LE:
3184                smmu->features |= ARM_SMMU_FEAT_TT_LE;
3185                break;
3186#endif
3187        default:
3188                dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3189                return -ENXIO;
3190        }
3191
3192        /* Boolean feature flags */
3193        if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3194                smmu->features |= ARM_SMMU_FEAT_PRI;
3195
3196        if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3197                smmu->features |= ARM_SMMU_FEAT_ATS;
3198
3199        if (reg & IDR0_SEV)
3200                smmu->features |= ARM_SMMU_FEAT_SEV;
3201
3202        if (reg & IDR0_MSI) {
3203                smmu->features |= ARM_SMMU_FEAT_MSI;
3204                if (coherent && !disable_msipolling)
3205                        smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3206        }
3207
3208        if (reg & IDR0_HYP)
3209                smmu->features |= ARM_SMMU_FEAT_HYP;
3210
3211        /*
3212         * The coherency feature as set by FW is used in preference to the ID
3213         * register, but warn on mismatch.
3214         */
3215        if (!!(reg & IDR0_COHACC) != coherent)
3216                dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3217                         coherent ? "true" : "false");
3218
3219        switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3220        case IDR0_STALL_MODEL_FORCE:
3221                smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3222                fallthrough;
3223        case IDR0_STALL_MODEL_STALL:
3224                smmu->features |= ARM_SMMU_FEAT_STALLS;
3225        }
3226
3227        if (reg & IDR0_S1P)
3228                smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3229
3230        if (reg & IDR0_S2P)
3231                smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3232
3233        if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3234                dev_err(smmu->dev, "no translation support!\n");
3235                return -ENXIO;
3236        }
3237
3238        /* We only support the AArch64 table format at present */
3239        switch (FIELD_GET(IDR0_TTF, reg)) {
3240        case IDR0_TTF_AARCH32_64:
3241                smmu->ias = 40;
3242                fallthrough;
3243        case IDR0_TTF_AARCH64:
3244                break;
3245        default:
3246                dev_err(smmu->dev, "AArch64 table format not supported!\n");
3247                return -ENXIO;
3248        }
3249
3250        /* ASID/VMID sizes */
3251        smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3252        smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3253
3254        /* IDR1 */
3255        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3256        if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3257                dev_err(smmu->dev, "embedded implementation not supported\n");
3258                return -ENXIO;
3259        }
3260
3261        /* Queue sizes, capped to ensure natural alignment */
3262        smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3263                                             FIELD_GET(IDR1_CMDQS, reg));
3264        if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3265                /*
3266                 * We don't support splitting up batches, so one batch of
3267                 * commands plus an extra sync needs to fit inside the command
3268                 * queue. There's also no way we can handle the weird alignment
3269                 * restrictions on the base pointer for a unit-length queue.
3270                 */
3271                dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3272                        CMDQ_BATCH_ENTRIES);
3273                return -ENXIO;
3274        }
3275
3276        smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3277                                             FIELD_GET(IDR1_EVTQS, reg));
3278        smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3279                                             FIELD_GET(IDR1_PRIQS, reg));
3280
3281        /* SID/SSID sizes */
3282        smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3283        smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3284
3285        /*
3286         * If the SMMU supports fewer bits than would fill a single L2 stream
3287         * table, use a linear table instead.
3288         */
3289        if (smmu->sid_bits <= STRTAB_SPLIT)
3290                smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3291
3292        /* IDR3 */
3293        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3294        if (FIELD_GET(IDR3_RIL, reg))
3295                smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3296
3297        /* IDR5 */
3298        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3299
3300        /* Maximum number of outstanding stalls */
3301        smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3302
3303        /* Page sizes */
3304        if (reg & IDR5_GRAN64K)
3305                smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3306        if (reg & IDR5_GRAN16K)
3307                smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3308        if (reg & IDR5_GRAN4K)
3309                smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3310
3311        /* Input address size */
3312        if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3313                smmu->features |= ARM_SMMU_FEAT_VAX;
3314
3315        /* Output address size */
3316        switch (FIELD_GET(IDR5_OAS, reg)) {
3317        case IDR5_OAS_32_BIT:
3318                smmu->oas = 32;
3319                break;
3320        case IDR5_OAS_36_BIT:
3321                smmu->oas = 36;
3322                break;
3323        case IDR5_OAS_40_BIT:
3324                smmu->oas = 40;
3325                break;
3326        case IDR5_OAS_42_BIT:
3327                smmu->oas = 42;
3328                break;
3329        case IDR5_OAS_44_BIT:
3330                smmu->oas = 44;
3331                break;
3332        case IDR5_OAS_52_BIT:
3333                smmu->oas = 52;
3334                smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3335                break;
3336        default:
3337                dev_info(smmu->dev,
3338                        "unknown output address size. Truncating to 48-bit\n");
3339                fallthrough;
3340        case IDR5_OAS_48_BIT:
3341                smmu->oas = 48;
3342        }
3343
3344        if (arm_smmu_ops.pgsize_bitmap == -1UL)
3345                arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3346        else
3347                arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3348
3349        /* Set the DMA mask for our table walker */
3350        if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3351                dev_warn(smmu->dev,
3352                         "failed to set DMA mask for table walker\n");
3353
3354        smmu->ias = max(smmu->ias, smmu->oas);
3355
3356        if (arm_smmu_sva_supported(smmu))
3357                smmu->features |= ARM_SMMU_FEAT_SVA;
3358
3359        dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3360                 smmu->ias, smmu->oas, smmu->features);
3361        return 0;
3362}
3363
3364#ifdef CONFIG_ACPI
3365static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3366{
3367        switch (model) {
3368        case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3369                smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3370                break;
3371        case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3372                smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3373                break;
3374        }
3375
3376        dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3377}
3378
3379static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3380                                      struct arm_smmu_device *smmu)
3381{
3382        struct acpi_iort_smmu_v3 *iort_smmu;
3383        struct device *dev = smmu->dev;
3384        struct acpi_iort_node *node;
3385
3386        node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3387
3388        /* Retrieve SMMUv3 specific data */
3389        iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3390
3391        acpi_smmu_get_options(iort_smmu->model, smmu);
3392
3393        if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3394                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3395
3396        return 0;
3397}
3398#else
3399static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3400                                             struct arm_smmu_device *smmu)
3401{
3402        return -ENODEV;
3403}
3404#endif
3405
3406static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3407                                    struct arm_smmu_device *smmu)
3408{
3409        struct device *dev = &pdev->dev;
3410        u32 cells;
3411        int ret = -EINVAL;
3412
3413        if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3414                dev_err(dev, "missing #iommu-cells property\n");
3415        else if (cells != 1)
3416                dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3417        else
3418                ret = 0;
3419
3420        parse_driver_options(smmu);
3421
3422        if (of_dma_is_coherent(dev->of_node))
3423                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3424
3425        return ret;
3426}
3427
3428static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3429{
3430        if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3431                return SZ_64K;
3432        else
3433                return SZ_128K;
3434}
3435
3436static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3437{
3438        int err;
3439
3440#ifdef CONFIG_PCI
3441        if (pci_bus_type.iommu_ops != ops) {
3442                err = bus_set_iommu(&pci_bus_type, ops);
3443                if (err)
3444                        return err;
3445        }
3446#endif
3447#ifdef CONFIG_ARM_AMBA
3448        if (amba_bustype.iommu_ops != ops) {
3449                err = bus_set_iommu(&amba_bustype, ops);
3450                if (err)
3451                        goto err_reset_pci_ops;
3452        }
3453#endif
3454        if (platform_bus_type.iommu_ops != ops) {
3455                err = bus_set_iommu(&platform_bus_type, ops);
3456                if (err)
3457                        goto err_reset_amba_ops;
3458        }
3459
3460        return 0;
3461
3462err_reset_amba_ops:
3463#ifdef CONFIG_ARM_AMBA
3464        bus_set_iommu(&amba_bustype, NULL);
3465#endif
3466err_reset_pci_ops: __maybe_unused;
3467#ifdef CONFIG_PCI
3468        bus_set_iommu(&pci_bus_type, NULL);
3469#endif
3470        return err;
3471}
3472
3473static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3474                                      resource_size_t size)
3475{
3476        struct resource res = {
3477                .flags = IORESOURCE_MEM,
3478                .start = start,
3479                .end = start + size - 1,
3480        };
3481
3482        return devm_ioremap_resource(dev, &res);
3483}
3484
3485static int arm_smmu_device_probe(struct platform_device *pdev)
3486{
3487        int irq, ret;
3488        struct resource *res;
3489        resource_size_t ioaddr;
3490        struct arm_smmu_device *smmu;
3491        struct device *dev = &pdev->dev;
3492        bool bypass;
3493
3494        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3495        if (!smmu) {
3496                dev_err(dev, "failed to allocate arm_smmu_device\n");
3497                return -ENOMEM;
3498        }
3499        smmu->dev = dev;
3500
3501        if (dev->of_node) {
3502                ret = arm_smmu_device_dt_probe(pdev, smmu);
3503        } else {
3504                ret = arm_smmu_device_acpi_probe(pdev, smmu);
3505                if (ret == -ENODEV)
3506                        return ret;
3507        }
3508
3509        /* Set bypass mode according to firmware probing result */
3510        bypass = !!ret;
3511
3512        /* Base address */
3513        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3514        if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3515                dev_err(dev, "MMIO region too small (%pr)\n", res);
3516                return -EINVAL;
3517        }
3518        ioaddr = res->start;
3519
3520        /*
3521         * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3522         * the PMCG registers which are reserved by the PMU driver.
3523         */
3524        smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3525        if (IS_ERR(smmu->base))
3526                return PTR_ERR(smmu->base);
3527
3528        if (arm_smmu_resource_size(smmu) > SZ_64K) {
3529                smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3530                                               ARM_SMMU_REG_SZ);
3531                if (IS_ERR(smmu->page1))
3532                        return PTR_ERR(smmu->page1);
3533        } else {
3534                smmu->page1 = smmu->base;
3535        }
3536
3537        /* Interrupt lines */
3538
3539        irq = platform_get_irq_byname_optional(pdev, "combined");
3540        if (irq > 0)
3541                smmu->combined_irq = irq;
3542        else {
3543                irq = platform_get_irq_byname_optional(pdev, "eventq");
3544                if (irq > 0)
3545                        smmu->evtq.q.irq = irq;
3546
3547                irq = platform_get_irq_byname_optional(pdev, "priq");
3548                if (irq > 0)
3549                        smmu->priq.q.irq = irq;
3550
3551                irq = platform_get_irq_byname_optional(pdev, "gerror");
3552                if (irq > 0)
3553                        smmu->gerr_irq = irq;
3554        }
3555        /* Probe the h/w */
3556        ret = arm_smmu_device_hw_probe(smmu);
3557        if (ret)
3558                return ret;
3559
3560        /* Initialise in-memory data structures */
3561        ret = arm_smmu_init_structures(smmu);
3562        if (ret)
3563                return ret;
3564
3565        /* Record our private device structure */
3566        platform_set_drvdata(pdev, smmu);
3567
3568        /* Reset the device */
3569        ret = arm_smmu_device_reset(smmu, bypass);
3570        if (ret)
3571                return ret;
3572
3573        /* And we're up. Go go go! */
3574        ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3575                                     "smmu3.%pa", &ioaddr);
3576        if (ret)
3577                return ret;
3578
3579        iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3580        iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3581
3582        ret = iommu_device_register(&smmu->iommu);
3583        if (ret) {
3584                dev_err(dev, "Failed to register iommu\n");
3585                return ret;
3586        }
3587
3588        return arm_smmu_set_bus_ops(&arm_smmu_ops);
3589}
3590
3591static int arm_smmu_device_remove(struct platform_device *pdev)
3592{
3593        struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3594
3595        arm_smmu_set_bus_ops(NULL);
3596        iommu_device_unregister(&smmu->iommu);
3597        iommu_device_sysfs_remove(&smmu->iommu);
3598        arm_smmu_device_disable(smmu);
3599
3600        return 0;
3601}
3602
3603static void arm_smmu_device_shutdown(struct platform_device *pdev)
3604{
3605        arm_smmu_device_remove(pdev);
3606}
3607
3608static const struct of_device_id arm_smmu_of_match[] = {
3609        { .compatible = "arm,smmu-v3", },
3610        { },
3611};
3612MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3613
3614static struct platform_driver arm_smmu_driver = {
3615        .driver = {
3616                .name                   = "arm-smmu-v3",
3617                .of_match_table         = arm_smmu_of_match,
3618                .suppress_bind_attrs    = true,
3619        },
3620        .probe  = arm_smmu_device_probe,
3621        .remove = arm_smmu_device_remove,
3622        .shutdown = arm_smmu_device_shutdown,
3623};
3624module_platform_driver(arm_smmu_driver);
3625
3626MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3627MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3628MODULE_ALIAS("platform:arm-smmu-v3");
3629MODULE_LICENSE("GPL v2");
3630