linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * IOMMU API for ARM architected SMMUv3 implementations.
   4 *
   5 * Copyright (C) 2015 ARM Limited
   6 *
   7 * Author: Will Deacon <will.deacon@arm.com>
   8 *
   9 * This driver is powered by bad coffee and bombay mix.
  10 */
  11
  12#include <linux/acpi.h>
  13#include <linux/acpi_iort.h>
  14#include <linux/bitops.h>
  15#include <linux/crash_dump.h>
  16#include <linux/delay.h>
  17#include <linux/dma-iommu.h>
  18#include <linux/err.h>
  19#include <linux/interrupt.h>
  20#include <linux/io-pgtable.h>
  21#include <linux/iopoll.h>
  22#include <linux/module.h>
  23#include <linux/msi.h>
  24#include <linux/of.h>
  25#include <linux/of_address.h>
  26#include <linux/of_platform.h>
  27#include <linux/pci.h>
  28#include <linux/pci-ats.h>
  29#include <linux/platform_device.h>
  30
  31#include <linux/amba/bus.h>
  32
  33#include "arm-smmu-v3.h"
  34#include "../../iommu-sva-lib.h"
  35
  36static bool disable_bypass = true;
  37module_param(disable_bypass, bool, 0444);
  38MODULE_PARM_DESC(disable_bypass,
  39        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
  40
  41static bool disable_msipolling;
  42module_param(disable_msipolling, bool, 0444);
  43MODULE_PARM_DESC(disable_msipolling,
  44        "Disable MSI-based polling for CMD_SYNC completion.");
  45
  46enum arm_smmu_msi_index {
  47        EVTQ_MSI_INDEX,
  48        GERROR_MSI_INDEX,
  49        PRIQ_MSI_INDEX,
  50        ARM_SMMU_MAX_MSIS,
  51};
  52
  53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
  54        [EVTQ_MSI_INDEX] = {
  55                ARM_SMMU_EVTQ_IRQ_CFG0,
  56                ARM_SMMU_EVTQ_IRQ_CFG1,
  57                ARM_SMMU_EVTQ_IRQ_CFG2,
  58        },
  59        [GERROR_MSI_INDEX] = {
  60                ARM_SMMU_GERROR_IRQ_CFG0,
  61                ARM_SMMU_GERROR_IRQ_CFG1,
  62                ARM_SMMU_GERROR_IRQ_CFG2,
  63        },
  64        [PRIQ_MSI_INDEX] = {
  65                ARM_SMMU_PRIQ_IRQ_CFG0,
  66                ARM_SMMU_PRIQ_IRQ_CFG1,
  67                ARM_SMMU_PRIQ_IRQ_CFG2,
  68        },
  69};
  70
  71struct arm_smmu_option_prop {
  72        u32 opt;
  73        const char *prop;
  74};
  75
  76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
  77DEFINE_MUTEX(arm_smmu_asid_lock);
  78
  79/*
  80 * Special value used by SVA when a process dies, to quiesce a CD without
  81 * disabling it.
  82 */
  83struct arm_smmu_ctx_desc quiet_cd = { 0 };
  84
  85static struct arm_smmu_option_prop arm_smmu_options[] = {
  86        { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
  87        { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
  88        { 0, NULL},
  89};
  90
  91static void parse_driver_options(struct arm_smmu_device *smmu)
  92{
  93        int i = 0;
  94
  95        do {
  96                if (of_property_read_bool(smmu->dev->of_node,
  97                                                arm_smmu_options[i].prop)) {
  98                        smmu->options |= arm_smmu_options[i].opt;
  99                        dev_notice(smmu->dev, "option %s\n",
 100                                arm_smmu_options[i].prop);
 101                }
 102        } while (arm_smmu_options[++i].opt);
 103}
 104
 105/* Low-level queue manipulation functions */
 106static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
 107{
 108        u32 space, prod, cons;
 109
 110        prod = Q_IDX(q, q->prod);
 111        cons = Q_IDX(q, q->cons);
 112
 113        if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
 114                space = (1 << q->max_n_shift) - (prod - cons);
 115        else
 116                space = cons - prod;
 117
 118        return space >= n;
 119}
 120
 121static bool queue_full(struct arm_smmu_ll_queue *q)
 122{
 123        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 124               Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
 125}
 126
 127static bool queue_empty(struct arm_smmu_ll_queue *q)
 128{
 129        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 130               Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
 131}
 132
 133static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
 134{
 135        return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
 136                (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
 137               ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
 138                (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
 139}
 140
 141static void queue_sync_cons_out(struct arm_smmu_queue *q)
 142{
 143        /*
 144         * Ensure that all CPU accesses (reads and writes) to the queue
 145         * are complete before we update the cons pointer.
 146         */
 147        __iomb();
 148        writel_relaxed(q->llq.cons, q->cons_reg);
 149}
 150
 151static void queue_inc_cons(struct arm_smmu_ll_queue *q)
 152{
 153        u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
 154        q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
 155}
 156
 157static int queue_sync_prod_in(struct arm_smmu_queue *q)
 158{
 159        u32 prod;
 160        int ret = 0;
 161
 162        /*
 163         * We can't use the _relaxed() variant here, as we must prevent
 164         * speculative reads of the queue before we have determined that
 165         * prod has indeed moved.
 166         */
 167        prod = readl(q->prod_reg);
 168
 169        if (Q_OVF(prod) != Q_OVF(q->llq.prod))
 170                ret = -EOVERFLOW;
 171
 172        q->llq.prod = prod;
 173        return ret;
 174}
 175
 176static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
 177{
 178        u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
 179        return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
 180}
 181
 182static void queue_poll_init(struct arm_smmu_device *smmu,
 183                            struct arm_smmu_queue_poll *qp)
 184{
 185        qp->delay = 1;
 186        qp->spin_cnt = 0;
 187        qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 188        qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 189}
 190
 191static int queue_poll(struct arm_smmu_queue_poll *qp)
 192{
 193        if (ktime_compare(ktime_get(), qp->timeout) > 0)
 194                return -ETIMEDOUT;
 195
 196        if (qp->wfe) {
 197                wfe();
 198        } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
 199                cpu_relax();
 200        } else {
 201                udelay(qp->delay);
 202                qp->delay *= 2;
 203                qp->spin_cnt = 0;
 204        }
 205
 206        return 0;
 207}
 208
 209static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
 210{
 211        int i;
 212
 213        for (i = 0; i < n_dwords; ++i)
 214                *dst++ = cpu_to_le64(*src++);
 215}
 216
 217static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
 218{
 219        int i;
 220
 221        for (i = 0; i < n_dwords; ++i)
 222                *dst++ = le64_to_cpu(*src++);
 223}
 224
 225static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 226{
 227        if (queue_empty(&q->llq))
 228                return -EAGAIN;
 229
 230        queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
 231        queue_inc_cons(&q->llq);
 232        queue_sync_cons_out(q);
 233        return 0;
 234}
 235
 236/* High-level queue accessors */
 237static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 238{
 239        memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
 240        cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 241
 242        switch (ent->opcode) {
 243        case CMDQ_OP_TLBI_EL2_ALL:
 244        case CMDQ_OP_TLBI_NSNH_ALL:
 245                break;
 246        case CMDQ_OP_PREFETCH_CFG:
 247                cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
 248                break;
 249        case CMDQ_OP_CFGI_CD:
 250                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
 251                fallthrough;
 252        case CMDQ_OP_CFGI_STE:
 253                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 254                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 255                break;
 256        case CMDQ_OP_CFGI_CD_ALL:
 257                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 258                break;
 259        case CMDQ_OP_CFGI_ALL:
 260                /* Cover the entire SID range */
 261                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
 262                break;
 263        case CMDQ_OP_TLBI_NH_VA:
 264                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 265                fallthrough;
 266        case CMDQ_OP_TLBI_EL2_VA:
 267                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 268                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 269                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 270                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 271                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 272                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 273                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 274                break;
 275        case CMDQ_OP_TLBI_S2_IPA:
 276                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 277                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 278                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 279                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 280                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 281                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 282                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 283                break;
 284        case CMDQ_OP_TLBI_NH_ASID:
 285                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 286                fallthrough;
 287        case CMDQ_OP_TLBI_S12_VMALL:
 288                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 289                break;
 290        case CMDQ_OP_TLBI_EL2_ASID:
 291                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 292                break;
 293        case CMDQ_OP_ATC_INV:
 294                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 295                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
 296                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
 297                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
 298                cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
 299                cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
 300                break;
 301        case CMDQ_OP_PRI_RESP:
 302                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 303                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
 304                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
 305                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
 306                switch (ent->pri.resp) {
 307                case PRI_RESP_DENY:
 308                case PRI_RESP_FAIL:
 309                case PRI_RESP_SUCC:
 310                        break;
 311                default:
 312                        return -EINVAL;
 313                }
 314                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 315                break;
 316        case CMDQ_OP_RESUME:
 317                cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
 318                cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
 319                cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
 320                break;
 321        case CMDQ_OP_CMD_SYNC:
 322                if (ent->sync.msiaddr) {
 323                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 324                        cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 325                } else {
 326                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
 327                }
 328                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
 329                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 330                break;
 331        default:
 332                return -ENOENT;
 333        }
 334
 335        return 0;
 336}
 337
 338static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
 339{
 340        return &smmu->cmdq;
 341}
 342
 343static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 344                                         struct arm_smmu_queue *q, u32 prod)
 345{
 346        struct arm_smmu_cmdq_ent ent = {
 347                .opcode = CMDQ_OP_CMD_SYNC,
 348        };
 349
 350        /*
 351         * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 352         * payload, so the write will zero the entire command on that platform.
 353         */
 354        if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
 355                ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 356                                   q->ent_dwords * 8;
 357        }
 358
 359        arm_smmu_cmdq_build_cmd(cmd, &ent);
 360}
 361
 362static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 363                                     struct arm_smmu_queue *q)
 364{
 365        static const char * const cerror_str[] = {
 366                [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
 367                [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
 368                [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
 369                [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
 370        };
 371
 372        int i;
 373        u64 cmd[CMDQ_ENT_DWORDS];
 374        u32 cons = readl_relaxed(q->cons_reg);
 375        u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 376        struct arm_smmu_cmdq_ent cmd_sync = {
 377                .opcode = CMDQ_OP_CMD_SYNC,
 378        };
 379
 380        dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
 381                idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
 382
 383        switch (idx) {
 384        case CMDQ_ERR_CERROR_ABT_IDX:
 385                dev_err(smmu->dev, "retrying command fetch\n");
 386                return;
 387        case CMDQ_ERR_CERROR_NONE_IDX:
 388                return;
 389        case CMDQ_ERR_CERROR_ATC_INV_IDX:
 390                /*
 391                 * ATC Invalidation Completion timeout. CONS is still pointing
 392                 * at the CMD_SYNC. Attempt to complete other pending commands
 393                 * by repeating the CMD_SYNC, though we might well end up back
 394                 * here since the ATC invalidation may still be pending.
 395                 */
 396                return;
 397        case CMDQ_ERR_CERROR_ILL_IDX:
 398        default:
 399                break;
 400        }
 401
 402        /*
 403         * We may have concurrent producers, so we need to be careful
 404         * not to touch any of the shadow cmdq state.
 405         */
 406        queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
 407        dev_err(smmu->dev, "skipping command in error state:\n");
 408        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 409                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 410
 411        /* Convert the erroneous command into a CMD_SYNC */
 412        if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
 413                dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
 414                return;
 415        }
 416
 417        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 418}
 419
 420static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 421{
 422        __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
 423}
 424
 425/*
 426 * Command queue locking.
 427 * This is a form of bastardised rwlock with the following major changes:
 428 *
 429 * - The only LOCK routines are exclusive_trylock() and shared_lock().
 430 *   Neither have barrier semantics, and instead provide only a control
 431 *   dependency.
 432 *
 433 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
 434 *   fails if the caller appears to be the last lock holder (yes, this is
 435 *   racy). All successful UNLOCK routines have RELEASE semantics.
 436 */
 437static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
 438{
 439        int val;
 440
 441        /*
 442         * We can try to avoid the cmpxchg() loop by simply incrementing the
 443         * lock counter. When held in exclusive state, the lock counter is set
 444         * to INT_MIN so these increments won't hurt as the value will remain
 445         * negative.
 446         */
 447        if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
 448                return;
 449
 450        do {
 451                val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
 452        } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
 453}
 454
 455static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
 456{
 457        (void)atomic_dec_return_release(&cmdq->lock);
 458}
 459
 460static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
 461{
 462        if (atomic_read(&cmdq->lock) == 1)
 463                return false;
 464
 465        arm_smmu_cmdq_shared_unlock(cmdq);
 466        return true;
 467}
 468
 469#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
 470({                                                                      \
 471        bool __ret;                                                     \
 472        local_irq_save(flags);                                          \
 473        __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
 474        if (!__ret)                                                     \
 475                local_irq_restore(flags);                               \
 476        __ret;                                                          \
 477})
 478
 479#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
 480({                                                                      \
 481        atomic_set_release(&cmdq->lock, 0);                             \
 482        local_irq_restore(flags);                                       \
 483})
 484
 485
 486/*
 487 * Command queue insertion.
 488 * This is made fiddly by our attempts to achieve some sort of scalability
 489 * since there is one queue shared amongst all of the CPUs in the system.  If
 490 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
 491 * then you'll *love* this monstrosity.
 492 *
 493 * The basic idea is to split the queue up into ranges of commands that are
 494 * owned by a given CPU; the owner may not have written all of the commands
 495 * itself, but is responsible for advancing the hardware prod pointer when
 496 * the time comes. The algorithm is roughly:
 497 *
 498 *      1. Allocate some space in the queue. At this point we also discover
 499 *         whether the head of the queue is currently owned by another CPU,
 500 *         or whether we are the owner.
 501 *
 502 *      2. Write our commands into our allocated slots in the queue.
 503 *
 504 *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
 505 *
 506 *      4. If we are an owner:
 507 *              a. Wait for the previous owner to finish.
 508 *              b. Mark the queue head as unowned, which tells us the range
 509 *                 that we are responsible for publishing.
 510 *              c. Wait for all commands in our owned range to become valid.
 511 *              d. Advance the hardware prod pointer.
 512 *              e. Tell the next owner we've finished.
 513 *
 514 *      5. If we are inserting a CMD_SYNC (we may or may not have been an
 515 *         owner), then we need to stick around until it has completed:
 516 *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
 517 *                 to clear the first 4 bytes.
 518 *              b. Otherwise, we spin waiting for the hardware cons pointer to
 519 *                 advance past our command.
 520 *
 521 * The devil is in the details, particularly the use of locking for handling
 522 * SYNC completion and freeing up space in the queue before we think that it is
 523 * full.
 524 */
 525static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
 526                                               u32 sprod, u32 eprod, bool set)
 527{
 528        u32 swidx, sbidx, ewidx, ebidx;
 529        struct arm_smmu_ll_queue llq = {
 530                .max_n_shift    = cmdq->q.llq.max_n_shift,
 531                .prod           = sprod,
 532        };
 533
 534        ewidx = BIT_WORD(Q_IDX(&llq, eprod));
 535        ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
 536
 537        while (llq.prod != eprod) {
 538                unsigned long mask;
 539                atomic_long_t *ptr;
 540                u32 limit = BITS_PER_LONG;
 541
 542                swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
 543                sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
 544
 545                ptr = &cmdq->valid_map[swidx];
 546
 547                if ((swidx == ewidx) && (sbidx < ebidx))
 548                        limit = ebidx;
 549
 550                mask = GENMASK(limit - 1, sbidx);
 551
 552                /*
 553                 * The valid bit is the inverse of the wrap bit. This means
 554                 * that a zero-initialised queue is invalid and, after marking
 555                 * all entries as valid, they become invalid again when we
 556                 * wrap.
 557                 */
 558                if (set) {
 559                        atomic_long_xor(mask, ptr);
 560                } else { /* Poll */
 561                        unsigned long valid;
 562
 563                        valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
 564                        atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
 565                }
 566
 567                llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
 568        }
 569}
 570
 571/* Mark all entries in the range [sprod, eprod) as valid */
 572static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
 573                                        u32 sprod, u32 eprod)
 574{
 575        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
 576}
 577
 578/* Wait for all entries in the range [sprod, eprod) to become valid */
 579static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
 580                                         u32 sprod, u32 eprod)
 581{
 582        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
 583}
 584
 585/* Wait for the command queue to become non-full */
 586static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 587                                             struct arm_smmu_ll_queue *llq)
 588{
 589        unsigned long flags;
 590        struct arm_smmu_queue_poll qp;
 591        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 592        int ret = 0;
 593
 594        /*
 595         * Try to update our copy of cons by grabbing exclusive cmdq access. If
 596         * that fails, spin until somebody else updates it for us.
 597         */
 598        if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
 599                WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
 600                arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
 601                llq->val = READ_ONCE(cmdq->q.llq.val);
 602                return 0;
 603        }
 604
 605        queue_poll_init(smmu, &qp);
 606        do {
 607                llq->val = READ_ONCE(cmdq->q.llq.val);
 608                if (!queue_full(llq))
 609                        break;
 610
 611                ret = queue_poll(&qp);
 612        } while (!ret);
 613
 614        return ret;
 615}
 616
 617/*
 618 * Wait until the SMMU signals a CMD_SYNC completion MSI.
 619 * Must be called with the cmdq lock held in some capacity.
 620 */
 621static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 622                                          struct arm_smmu_ll_queue *llq)
 623{
 624        int ret = 0;
 625        struct arm_smmu_queue_poll qp;
 626        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 627        u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 628
 629        queue_poll_init(smmu, &qp);
 630
 631        /*
 632         * The MSI won't generate an event, since it's being written back
 633         * into the command queue.
 634         */
 635        qp.wfe = false;
 636        smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
 637        llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
 638        return ret;
 639}
 640
 641/*
 642 * Wait until the SMMU cons index passes llq->prod.
 643 * Must be called with the cmdq lock held in some capacity.
 644 */
 645static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 646                                               struct arm_smmu_ll_queue *llq)
 647{
 648        struct arm_smmu_queue_poll qp;
 649        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 650        u32 prod = llq->prod;
 651        int ret = 0;
 652
 653        queue_poll_init(smmu, &qp);
 654        llq->val = READ_ONCE(cmdq->q.llq.val);
 655        do {
 656                if (queue_consumed(llq, prod))
 657                        break;
 658
 659                ret = queue_poll(&qp);
 660
 661                /*
 662                 * This needs to be a readl() so that our subsequent call
 663                 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
 664                 *
 665                 * Specifically, we need to ensure that we observe all
 666                 * shared_lock()s by other CMD_SYNCs that share our owner,
 667                 * so that a failing call to tryunlock() means that we're
 668                 * the last one out and therefore we can safely advance
 669                 * cmdq->q.llq.cons. Roughly speaking:
 670                 *
 671                 * CPU 0                CPU1                    CPU2 (us)
 672                 *
 673                 * if (sync)
 674                 *      shared_lock();
 675                 *
 676                 * dma_wmb();
 677                 * set_valid_map();
 678                 *
 679                 *                      if (owner) {
 680                 *                              poll_valid_map();
 681                 *                              <control dependency>
 682                 *                              writel(prod_reg);
 683                 *
 684                 *                                              readl(cons_reg);
 685                 *                                              tryunlock();
 686                 *
 687                 * Requires us to see CPU 0's shared_lock() acquisition.
 688                 */
 689                llq->cons = readl(cmdq->q.cons_reg);
 690        } while (!ret);
 691
 692        return ret;
 693}
 694
 695static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 696                                         struct arm_smmu_ll_queue *llq)
 697{
 698        if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
 699                return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 700
 701        return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
 702}
 703
 704static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
 705                                        u32 prod, int n)
 706{
 707        int i;
 708        struct arm_smmu_ll_queue llq = {
 709                .max_n_shift    = cmdq->q.llq.max_n_shift,
 710                .prod           = prod,
 711        };
 712
 713        for (i = 0; i < n; ++i) {
 714                u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
 715
 716                prod = queue_inc_prod_n(&llq, i);
 717                queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
 718        }
 719}
 720
 721/*
 722 * This is the actual insertion function, and provides the following
 723 * ordering guarantees to callers:
 724 *
 725 * - There is a dma_wmb() before publishing any commands to the queue.
 726 *   This can be relied upon to order prior writes to data structures
 727 *   in memory (such as a CD or an STE) before the command.
 728 *
 729 * - On completion of a CMD_SYNC, there is a control dependency.
 730 *   This can be relied upon to order subsequent writes to memory (e.g.
 731 *   freeing an IOVA) after completion of the CMD_SYNC.
 732 *
 733 * - Command insertion is totally ordered, so if two CPUs each race to
 734 *   insert their own list of commands then all of the commands from one
 735 *   CPU will appear before any of the commands from the other CPU.
 736 */
 737static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 738                                       u64 *cmds, int n, bool sync)
 739{
 740        u64 cmd_sync[CMDQ_ENT_DWORDS];
 741        u32 prod;
 742        unsigned long flags;
 743        bool owner;
 744        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 745        struct arm_smmu_ll_queue llq, head;
 746        int ret = 0;
 747
 748        llq.max_n_shift = cmdq->q.llq.max_n_shift;
 749
 750        /* 1. Allocate some space in the queue */
 751        local_irq_save(flags);
 752        llq.val = READ_ONCE(cmdq->q.llq.val);
 753        do {
 754                u64 old;
 755
 756                while (!queue_has_space(&llq, n + sync)) {
 757                        local_irq_restore(flags);
 758                        if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
 759                                dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 760                        local_irq_save(flags);
 761                }
 762
 763                head.cons = llq.cons;
 764                head.prod = queue_inc_prod_n(&llq, n + sync) |
 765                                             CMDQ_PROD_OWNED_FLAG;
 766
 767                old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
 768                if (old == llq.val)
 769                        break;
 770
 771                llq.val = old;
 772        } while (1);
 773        owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
 774        head.prod &= ~CMDQ_PROD_OWNED_FLAG;
 775        llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
 776
 777        /*
 778         * 2. Write our commands into the queue
 779         * Dependency ordering from the cmpxchg() loop above.
 780         */
 781        arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 782        if (sync) {
 783                prod = queue_inc_prod_n(&llq, n);
 784                arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
 785                queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 786
 787                /*
 788                 * In order to determine completion of our CMD_SYNC, we must
 789                 * ensure that the queue can't wrap twice without us noticing.
 790                 * We achieve that by taking the cmdq lock as shared before
 791                 * marking our slot as valid.
 792                 */
 793                arm_smmu_cmdq_shared_lock(cmdq);
 794        }
 795
 796        /* 3. Mark our slots as valid, ensuring commands are visible first */
 797        dma_wmb();
 798        arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
 799
 800        /* 4. If we are the owner, take control of the SMMU hardware */
 801        if (owner) {
 802                /* a. Wait for previous owner to finish */
 803                atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
 804
 805                /* b. Stop gathering work by clearing the owned flag */
 806                prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
 807                                                   &cmdq->q.llq.atomic.prod);
 808                prod &= ~CMDQ_PROD_OWNED_FLAG;
 809
 810                /*
 811                 * c. Wait for any gathered work to be written to the queue.
 812                 * Note that we read our own entries so that we have the control
 813                 * dependency required by (d).
 814                 */
 815                arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
 816
 817                /*
 818                 * d. Advance the hardware prod pointer
 819                 * Control dependency ordering from the entries becoming valid.
 820                 */
 821                writel_relaxed(prod, cmdq->q.prod_reg);
 822
 823                /*
 824                 * e. Tell the next owner we're done
 825                 * Make sure we've updated the hardware first, so that we don't
 826                 * race to update prod and potentially move it backwards.
 827                 */
 828                atomic_set_release(&cmdq->owner_prod, prod);
 829        }
 830
 831        /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
 832        if (sync) {
 833                llq.prod = queue_inc_prod_n(&llq, n);
 834                ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
 835                if (ret) {
 836                        dev_err_ratelimited(smmu->dev,
 837                                            "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
 838                                            llq.prod,
 839                                            readl_relaxed(cmdq->q.prod_reg),
 840                                            readl_relaxed(cmdq->q.cons_reg));
 841                }
 842
 843                /*
 844                 * Try to unlock the cmdq lock. This will fail if we're the last
 845                 * reader, in which case we can safely update cmdq->q.llq.cons
 846                 */
 847                if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
 848                        WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
 849                        arm_smmu_cmdq_shared_unlock(cmdq);
 850                }
 851        }
 852
 853        local_irq_restore(flags);
 854        return ret;
 855}
 856
 857static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 858                                     struct arm_smmu_cmdq_ent *ent,
 859                                     bool sync)
 860{
 861        u64 cmd[CMDQ_ENT_DWORDS];
 862
 863        if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
 864                dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
 865                         ent->opcode);
 866                return -EINVAL;
 867        }
 868
 869        return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
 870}
 871
 872static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 873                                   struct arm_smmu_cmdq_ent *ent)
 874{
 875        return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
 876}
 877
 878static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
 879                                             struct arm_smmu_cmdq_ent *ent)
 880{
 881        return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
 882}
 883
 884static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 885                                    struct arm_smmu_cmdq_batch *cmds,
 886                                    struct arm_smmu_cmdq_ent *cmd)
 887{
 888        if (cmds->num == CMDQ_BATCH_ENTRIES) {
 889                arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
 890                cmds->num = 0;
 891        }
 892        arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
 893        cmds->num++;
 894}
 895
 896static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
 897                                      struct arm_smmu_cmdq_batch *cmds)
 898{
 899        return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
 900}
 901
 902static int arm_smmu_page_response(struct device *dev,
 903                                  struct iommu_fault_event *unused,
 904                                  struct iommu_page_response *resp)
 905{
 906        struct arm_smmu_cmdq_ent cmd = {0};
 907        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
 908        int sid = master->streams[0].id;
 909
 910        if (master->stall_enabled) {
 911                cmd.opcode              = CMDQ_OP_RESUME;
 912                cmd.resume.sid          = sid;
 913                cmd.resume.stag         = resp->grpid;
 914                switch (resp->code) {
 915                case IOMMU_PAGE_RESP_INVALID:
 916                case IOMMU_PAGE_RESP_FAILURE:
 917                        cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
 918                        break;
 919                case IOMMU_PAGE_RESP_SUCCESS:
 920                        cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
 921                        break;
 922                default:
 923                        return -EINVAL;
 924                }
 925        } else {
 926                return -ENODEV;
 927        }
 928
 929        arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
 930        /*
 931         * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
 932         * RESUME consumption guarantees that the stalled transaction will be
 933         * terminated... at some point in the future. PRI_RESP is fire and
 934         * forget.
 935         */
 936
 937        return 0;
 938}
 939
 940/* Context descriptor manipulation functions */
 941void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 942{
 943        struct arm_smmu_cmdq_ent cmd = {
 944                .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
 945                        CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
 946                .tlbi.asid = asid,
 947        };
 948
 949        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 950}
 951
 952static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 953                             int ssid, bool leaf)
 954{
 955        size_t i;
 956        unsigned long flags;
 957        struct arm_smmu_master *master;
 958        struct arm_smmu_cmdq_batch cmds;
 959        struct arm_smmu_device *smmu = smmu_domain->smmu;
 960        struct arm_smmu_cmdq_ent cmd = {
 961                .opcode = CMDQ_OP_CFGI_CD,
 962                .cfgi   = {
 963                        .ssid   = ssid,
 964                        .leaf   = leaf,
 965                },
 966        };
 967
 968        cmds.num = 0;
 969
 970        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 971        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
 972                for (i = 0; i < master->num_streams; i++) {
 973                        cmd.cfgi.sid = master->streams[i].id;
 974                        arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 975                }
 976        }
 977        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 978
 979        arm_smmu_cmdq_batch_submit(smmu, &cmds);
 980}
 981
 982static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
 983                                        struct arm_smmu_l1_ctx_desc *l1_desc)
 984{
 985        size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
 986
 987        l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
 988                                             &l1_desc->l2ptr_dma, GFP_KERNEL);
 989        if (!l1_desc->l2ptr) {
 990                dev_warn(smmu->dev,
 991                         "failed to allocate context descriptor table\n");
 992                return -ENOMEM;
 993        }
 994        return 0;
 995}
 996
 997static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 998                                      struct arm_smmu_l1_ctx_desc *l1_desc)
 999{
1000        u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1001                  CTXDESC_L1_DESC_V;
1002
1003        /* See comment in arm_smmu_write_ctx_desc() */
1004        WRITE_ONCE(*dst, cpu_to_le64(val));
1005}
1006
1007static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1008                                   u32 ssid)
1009{
1010        __le64 *l1ptr;
1011        unsigned int idx;
1012        struct arm_smmu_l1_ctx_desc *l1_desc;
1013        struct arm_smmu_device *smmu = smmu_domain->smmu;
1014        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1015
1016        if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1017                return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1018
1019        idx = ssid >> CTXDESC_SPLIT;
1020        l1_desc = &cdcfg->l1_desc[idx];
1021        if (!l1_desc->l2ptr) {
1022                if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1023                        return NULL;
1024
1025                l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1026                arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1027                /* An invalid L1CD can be cached */
1028                arm_smmu_sync_cd(smmu_domain, ssid, false);
1029        }
1030        idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1031        return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1032}
1033
1034int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1035                            struct arm_smmu_ctx_desc *cd)
1036{
1037        /*
1038         * This function handles the following cases:
1039         *
1040         * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1041         * (2) Install a secondary CD, for SID+SSID traffic.
1042         * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1043         *     CD, then invalidate the old entry and mappings.
1044         * (4) Quiesce the context without clearing the valid bit. Disable
1045         *     translation, and ignore any translation fault.
1046         * (5) Remove a secondary CD.
1047         */
1048        u64 val;
1049        bool cd_live;
1050        __le64 *cdptr;
1051
1052        if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1053                return -E2BIG;
1054
1055        cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1056        if (!cdptr)
1057                return -ENOMEM;
1058
1059        val = le64_to_cpu(cdptr[0]);
1060        cd_live = !!(val & CTXDESC_CD_0_V);
1061
1062        if (!cd) { /* (5) */
1063                val = 0;
1064        } else if (cd == &quiet_cd) { /* (4) */
1065                val |= CTXDESC_CD_0_TCR_EPD0;
1066        } else if (cd_live) { /* (3) */
1067                val &= ~CTXDESC_CD_0_ASID;
1068                val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1069                /*
1070                 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1071                 * this substream's traffic
1072                 */
1073        } else { /* (1) and (2) */
1074                cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1075                cdptr[2] = 0;
1076                cdptr[3] = cpu_to_le64(cd->mair);
1077
1078                /*
1079                 * STE is live, and the SMMU might read dwords of this CD in any
1080                 * order. Ensure that it observes valid values before reading
1081                 * V=1.
1082                 */
1083                arm_smmu_sync_cd(smmu_domain, ssid, true);
1084
1085                val = cd->tcr |
1086#ifdef __BIG_ENDIAN
1087                        CTXDESC_CD_0_ENDI |
1088#endif
1089                        CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1090                        (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1091                        CTXDESC_CD_0_AA64 |
1092                        FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1093                        CTXDESC_CD_0_V;
1094
1095                if (smmu_domain->stall_enabled)
1096                        val |= CTXDESC_CD_0_S;
1097        }
1098
1099        /*
1100         * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1101         * "Configuration structures and configuration invalidation completion"
1102         *
1103         *   The size of single-copy atomic reads made by the SMMU is
1104         *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1105         *   field within an aligned 64-bit span of a structure can be altered
1106         *   without first making the structure invalid.
1107         */
1108        WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1109        arm_smmu_sync_cd(smmu_domain, ssid, true);
1110        return 0;
1111}
1112
1113static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1114{
1115        int ret;
1116        size_t l1size;
1117        size_t max_contexts;
1118        struct arm_smmu_device *smmu = smmu_domain->smmu;
1119        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1120        struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1121
1122        max_contexts = 1 << cfg->s1cdmax;
1123
1124        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1125            max_contexts <= CTXDESC_L2_ENTRIES) {
1126                cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1127                cdcfg->num_l1_ents = max_contexts;
1128
1129                l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1130        } else {
1131                cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1132                cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1133                                                  CTXDESC_L2_ENTRIES);
1134
1135                cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1136                                              sizeof(*cdcfg->l1_desc),
1137                                              GFP_KERNEL);
1138                if (!cdcfg->l1_desc)
1139                        return -ENOMEM;
1140
1141                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1142        }
1143
1144        cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1145                                           GFP_KERNEL);
1146        if (!cdcfg->cdtab) {
1147                dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1148                ret = -ENOMEM;
1149                goto err_free_l1;
1150        }
1151
1152        return 0;
1153
1154err_free_l1:
1155        if (cdcfg->l1_desc) {
1156                devm_kfree(smmu->dev, cdcfg->l1_desc);
1157                cdcfg->l1_desc = NULL;
1158        }
1159        return ret;
1160}
1161
1162static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1163{
1164        int i;
1165        size_t size, l1size;
1166        struct arm_smmu_device *smmu = smmu_domain->smmu;
1167        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1168
1169        if (cdcfg->l1_desc) {
1170                size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1171
1172                for (i = 0; i < cdcfg->num_l1_ents; i++) {
1173                        if (!cdcfg->l1_desc[i].l2ptr)
1174                                continue;
1175
1176                        dmam_free_coherent(smmu->dev, size,
1177                                           cdcfg->l1_desc[i].l2ptr,
1178                                           cdcfg->l1_desc[i].l2ptr_dma);
1179                }
1180                devm_kfree(smmu->dev, cdcfg->l1_desc);
1181                cdcfg->l1_desc = NULL;
1182
1183                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1184        } else {
1185                l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1186        }
1187
1188        dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1189        cdcfg->cdtab_dma = 0;
1190        cdcfg->cdtab = NULL;
1191}
1192
1193bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1194{
1195        bool free;
1196        struct arm_smmu_ctx_desc *old_cd;
1197
1198        if (!cd->asid)
1199                return false;
1200
1201        free = refcount_dec_and_test(&cd->refs);
1202        if (free) {
1203                old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1204                WARN_ON(old_cd != cd);
1205        }
1206        return free;
1207}
1208
1209/* Stream table manipulation functions */
1210static void
1211arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1212{
1213        u64 val = 0;
1214
1215        val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1216        val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1217
1218        /* See comment in arm_smmu_write_ctx_desc() */
1219        WRITE_ONCE(*dst, cpu_to_le64(val));
1220}
1221
1222static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1223{
1224        struct arm_smmu_cmdq_ent cmd = {
1225                .opcode = CMDQ_OP_CFGI_STE,
1226                .cfgi   = {
1227                        .sid    = sid,
1228                        .leaf   = true,
1229                },
1230        };
1231
1232        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1233}
1234
1235static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1236                                      __le64 *dst)
1237{
1238        /*
1239         * This is hideously complicated, but we only really care about
1240         * three cases at the moment:
1241         *
1242         * 1. Invalid (all zero) -> bypass/fault (init)
1243         * 2. Bypass/fault -> translation/bypass (attach)
1244         * 3. Translation/bypass -> bypass/fault (detach)
1245         *
1246         * Given that we can't update the STE atomically and the SMMU
1247         * doesn't read the thing in a defined order, that leaves us
1248         * with the following maintenance requirements:
1249         *
1250         * 1. Update Config, return (init time STEs aren't live)
1251         * 2. Write everything apart from dword 0, sync, write dword 0, sync
1252         * 3. Update Config, sync
1253         */
1254        u64 val = le64_to_cpu(dst[0]);
1255        bool ste_live = false;
1256        struct arm_smmu_device *smmu = NULL;
1257        struct arm_smmu_s1_cfg *s1_cfg = NULL;
1258        struct arm_smmu_s2_cfg *s2_cfg = NULL;
1259        struct arm_smmu_domain *smmu_domain = NULL;
1260        struct arm_smmu_cmdq_ent prefetch_cmd = {
1261                .opcode         = CMDQ_OP_PREFETCH_CFG,
1262                .prefetch       = {
1263                        .sid    = sid,
1264                },
1265        };
1266
1267        if (master) {
1268                smmu_domain = master->domain;
1269                smmu = master->smmu;
1270        }
1271
1272        if (smmu_domain) {
1273                switch (smmu_domain->stage) {
1274                case ARM_SMMU_DOMAIN_S1:
1275                        s1_cfg = &smmu_domain->s1_cfg;
1276                        break;
1277                case ARM_SMMU_DOMAIN_S2:
1278                case ARM_SMMU_DOMAIN_NESTED:
1279                        s2_cfg = &smmu_domain->s2_cfg;
1280                        break;
1281                default:
1282                        break;
1283                }
1284        }
1285
1286        if (val & STRTAB_STE_0_V) {
1287                switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1288                case STRTAB_STE_0_CFG_BYPASS:
1289                        break;
1290                case STRTAB_STE_0_CFG_S1_TRANS:
1291                case STRTAB_STE_0_CFG_S2_TRANS:
1292                        ste_live = true;
1293                        break;
1294                case STRTAB_STE_0_CFG_ABORT:
1295                        BUG_ON(!disable_bypass);
1296                        break;
1297                default:
1298                        BUG(); /* STE corruption */
1299                }
1300        }
1301
1302        /* Nuke the existing STE_0 value, as we're going to rewrite it */
1303        val = STRTAB_STE_0_V;
1304
1305        /* Bypass/fault */
1306        if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1307                if (!smmu_domain && disable_bypass)
1308                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1309                else
1310                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1311
1312                dst[0] = cpu_to_le64(val);
1313                dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1314                                                STRTAB_STE_1_SHCFG_INCOMING));
1315                dst[2] = 0; /* Nuke the VMID */
1316                /*
1317                 * The SMMU can perform negative caching, so we must sync
1318                 * the STE regardless of whether the old value was live.
1319                 */
1320                if (smmu)
1321                        arm_smmu_sync_ste_for_sid(smmu, sid);
1322                return;
1323        }
1324
1325        if (s1_cfg) {
1326                u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1327                        STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1328
1329                BUG_ON(ste_live);
1330                dst[1] = cpu_to_le64(
1331                         FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1332                         FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1333                         FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1334                         FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1335                         FIELD_PREP(STRTAB_STE_1_STRW, strw));
1336
1337                if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1338                    !master->stall_enabled)
1339                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1340
1341                val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1342                        FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1343                        FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1344                        FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1345        }
1346
1347        if (s2_cfg) {
1348                BUG_ON(ste_live);
1349                dst[2] = cpu_to_le64(
1350                         FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1351                         FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1352#ifdef __BIG_ENDIAN
1353                         STRTAB_STE_2_S2ENDI |
1354#endif
1355                         STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1356                         STRTAB_STE_2_S2R);
1357
1358                dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1359
1360                val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1361        }
1362
1363        if (master->ats_enabled)
1364                dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1365                                                 STRTAB_STE_1_EATS_TRANS));
1366
1367        arm_smmu_sync_ste_for_sid(smmu, sid);
1368        /* See comment in arm_smmu_write_ctx_desc() */
1369        WRITE_ONCE(dst[0], cpu_to_le64(val));
1370        arm_smmu_sync_ste_for_sid(smmu, sid);
1371
1372        /* It's likely that we'll want to use the new STE soon */
1373        if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1374                arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1375}
1376
1377static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1378{
1379        unsigned int i;
1380
1381        for (i = 0; i < nent; ++i) {
1382                arm_smmu_write_strtab_ent(NULL, -1, strtab);
1383                strtab += STRTAB_STE_DWORDS;
1384        }
1385}
1386
1387static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1388{
1389        size_t size;
1390        void *strtab;
1391        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1392        struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1393
1394        if (desc->l2ptr)
1395                return 0;
1396
1397        size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1398        strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1399
1400        desc->span = STRTAB_SPLIT + 1;
1401        desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1402                                          GFP_KERNEL);
1403        if (!desc->l2ptr) {
1404                dev_err(smmu->dev,
1405                        "failed to allocate l2 stream table for SID %u\n",
1406                        sid);
1407                return -ENOMEM;
1408        }
1409
1410        arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1411        arm_smmu_write_strtab_l1_desc(strtab, desc);
1412        return 0;
1413}
1414
1415static struct arm_smmu_master *
1416arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1417{
1418        struct rb_node *node;
1419        struct arm_smmu_stream *stream;
1420
1421        lockdep_assert_held(&smmu->streams_mutex);
1422
1423        node = smmu->streams.rb_node;
1424        while (node) {
1425                stream = rb_entry(node, struct arm_smmu_stream, node);
1426                if (stream->id < sid)
1427                        node = node->rb_right;
1428                else if (stream->id > sid)
1429                        node = node->rb_left;
1430                else
1431                        return stream->master;
1432        }
1433
1434        return NULL;
1435}
1436
1437/* IRQ and event handlers */
1438static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1439{
1440        int ret;
1441        u32 reason;
1442        u32 perm = 0;
1443        struct arm_smmu_master *master;
1444        bool ssid_valid = evt[0] & EVTQ_0_SSV;
1445        u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1446        struct iommu_fault_event fault_evt = { };
1447        struct iommu_fault *flt = &fault_evt.fault;
1448
1449        switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1450        case EVT_ID_TRANSLATION_FAULT:
1451                reason = IOMMU_FAULT_REASON_PTE_FETCH;
1452                break;
1453        case EVT_ID_ADDR_SIZE_FAULT:
1454                reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1455                break;
1456        case EVT_ID_ACCESS_FAULT:
1457                reason = IOMMU_FAULT_REASON_ACCESS;
1458                break;
1459        case EVT_ID_PERMISSION_FAULT:
1460                reason = IOMMU_FAULT_REASON_PERMISSION;
1461                break;
1462        default:
1463                return -EOPNOTSUPP;
1464        }
1465
1466        /* Stage-2 is always pinned at the moment */
1467        if (evt[1] & EVTQ_1_S2)
1468                return -EFAULT;
1469
1470        if (evt[1] & EVTQ_1_RnW)
1471                perm |= IOMMU_FAULT_PERM_READ;
1472        else
1473                perm |= IOMMU_FAULT_PERM_WRITE;
1474
1475        if (evt[1] & EVTQ_1_InD)
1476                perm |= IOMMU_FAULT_PERM_EXEC;
1477
1478        if (evt[1] & EVTQ_1_PnU)
1479                perm |= IOMMU_FAULT_PERM_PRIV;
1480
1481        if (evt[1] & EVTQ_1_STALL) {
1482                flt->type = IOMMU_FAULT_PAGE_REQ;
1483                flt->prm = (struct iommu_fault_page_request) {
1484                        .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1485                        .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1486                        .perm = perm,
1487                        .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1488                };
1489
1490                if (ssid_valid) {
1491                        flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1492                        flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1493                }
1494        } else {
1495                flt->type = IOMMU_FAULT_DMA_UNRECOV;
1496                flt->event = (struct iommu_fault_unrecoverable) {
1497                        .reason = reason,
1498                        .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1499                        .perm = perm,
1500                        .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1501                };
1502
1503                if (ssid_valid) {
1504                        flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1505                        flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1506                }
1507        }
1508
1509        mutex_lock(&smmu->streams_mutex);
1510        master = arm_smmu_find_master(smmu, sid);
1511        if (!master) {
1512                ret = -EINVAL;
1513                goto out_unlock;
1514        }
1515
1516        ret = iommu_report_device_fault(master->dev, &fault_evt);
1517        if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1518                /* Nobody cared, abort the access */
1519                struct iommu_page_response resp = {
1520                        .pasid          = flt->prm.pasid,
1521                        .grpid          = flt->prm.grpid,
1522                        .code           = IOMMU_PAGE_RESP_FAILURE,
1523                };
1524                arm_smmu_page_response(master->dev, &fault_evt, &resp);
1525        }
1526
1527out_unlock:
1528        mutex_unlock(&smmu->streams_mutex);
1529        return ret;
1530}
1531
1532static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1533{
1534        int i, ret;
1535        struct arm_smmu_device *smmu = dev;
1536        struct arm_smmu_queue *q = &smmu->evtq.q;
1537        struct arm_smmu_ll_queue *llq = &q->llq;
1538        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1539                                      DEFAULT_RATELIMIT_BURST);
1540        u64 evt[EVTQ_ENT_DWORDS];
1541
1542        do {
1543                while (!queue_remove_raw(q, evt)) {
1544                        u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1545
1546                        ret = arm_smmu_handle_evt(smmu, evt);
1547                        if (!ret || !__ratelimit(&rs))
1548                                continue;
1549
1550                        dev_info(smmu->dev, "event 0x%02x received:\n", id);
1551                        for (i = 0; i < ARRAY_SIZE(evt); ++i)
1552                                dev_info(smmu->dev, "\t0x%016llx\n",
1553                                         (unsigned long long)evt[i]);
1554
1555                }
1556
1557                /*
1558                 * Not much we can do on overflow, so scream and pretend we're
1559                 * trying harder.
1560                 */
1561                if (queue_sync_prod_in(q) == -EOVERFLOW)
1562                        dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1563        } while (!queue_empty(llq));
1564
1565        /* Sync our overflow flag, as we believe we're up to speed */
1566        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1567                    Q_IDX(llq, llq->cons);
1568        return IRQ_HANDLED;
1569}
1570
1571static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1572{
1573        u32 sid, ssid;
1574        u16 grpid;
1575        bool ssv, last;
1576
1577        sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1578        ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1579        ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1580        last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1581        grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1582
1583        dev_info(smmu->dev, "unexpected PRI request received:\n");
1584        dev_info(smmu->dev,
1585                 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1586                 sid, ssid, grpid, last ? "L" : "",
1587                 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1588                 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1589                 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1590                 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1591                 evt[1] & PRIQ_1_ADDR_MASK);
1592
1593        if (last) {
1594                struct arm_smmu_cmdq_ent cmd = {
1595                        .opcode                 = CMDQ_OP_PRI_RESP,
1596                        .substream_valid        = ssv,
1597                        .pri                    = {
1598                                .sid    = sid,
1599                                .ssid   = ssid,
1600                                .grpid  = grpid,
1601                                .resp   = PRI_RESP_DENY,
1602                        },
1603                };
1604
1605                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1606        }
1607}
1608
1609static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1610{
1611        struct arm_smmu_device *smmu = dev;
1612        struct arm_smmu_queue *q = &smmu->priq.q;
1613        struct arm_smmu_ll_queue *llq = &q->llq;
1614        u64 evt[PRIQ_ENT_DWORDS];
1615
1616        do {
1617                while (!queue_remove_raw(q, evt))
1618                        arm_smmu_handle_ppr(smmu, evt);
1619
1620                if (queue_sync_prod_in(q) == -EOVERFLOW)
1621                        dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1622        } while (!queue_empty(llq));
1623
1624        /* Sync our overflow flag, as we believe we're up to speed */
1625        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1626                      Q_IDX(llq, llq->cons);
1627        queue_sync_cons_out(q);
1628        return IRQ_HANDLED;
1629}
1630
1631static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1632
1633static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1634{
1635        u32 gerror, gerrorn, active;
1636        struct arm_smmu_device *smmu = dev;
1637
1638        gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1639        gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1640
1641        active = gerror ^ gerrorn;
1642        if (!(active & GERROR_ERR_MASK))
1643                return IRQ_NONE; /* No errors pending */
1644
1645        dev_warn(smmu->dev,
1646                 "unexpected global error reported (0x%08x), this could be serious\n",
1647                 active);
1648
1649        if (active & GERROR_SFM_ERR) {
1650                dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1651                arm_smmu_device_disable(smmu);
1652        }
1653
1654        if (active & GERROR_MSI_GERROR_ABT_ERR)
1655                dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1656
1657        if (active & GERROR_MSI_PRIQ_ABT_ERR)
1658                dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1659
1660        if (active & GERROR_MSI_EVTQ_ABT_ERR)
1661                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1662
1663        if (active & GERROR_MSI_CMDQ_ABT_ERR)
1664                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1665
1666        if (active & GERROR_PRIQ_ABT_ERR)
1667                dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1668
1669        if (active & GERROR_EVTQ_ABT_ERR)
1670                dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1671
1672        if (active & GERROR_CMDQ_ERR)
1673                arm_smmu_cmdq_skip_err(smmu);
1674
1675        writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1676        return IRQ_HANDLED;
1677}
1678
1679static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1680{
1681        struct arm_smmu_device *smmu = dev;
1682
1683        arm_smmu_evtq_thread(irq, dev);
1684        if (smmu->features & ARM_SMMU_FEAT_PRI)
1685                arm_smmu_priq_thread(irq, dev);
1686
1687        return IRQ_HANDLED;
1688}
1689
1690static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1691{
1692        arm_smmu_gerror_handler(irq, dev);
1693        return IRQ_WAKE_THREAD;
1694}
1695
1696static void
1697arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1698                        struct arm_smmu_cmdq_ent *cmd)
1699{
1700        size_t log2_span;
1701        size_t span_mask;
1702        /* ATC invalidates are always on 4096-bytes pages */
1703        size_t inval_grain_shift = 12;
1704        unsigned long page_start, page_end;
1705
1706        /*
1707         * ATS and PASID:
1708         *
1709         * If substream_valid is clear, the PCIe TLP is sent without a PASID
1710         * prefix. In that case all ATC entries within the address range are
1711         * invalidated, including those that were requested with a PASID! There
1712         * is no way to invalidate only entries without PASID.
1713         *
1714         * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1715         * traffic), translation requests without PASID create ATC entries
1716         * without PASID, which must be invalidated with substream_valid clear.
1717         * This has the unpleasant side-effect of invalidating all PASID-tagged
1718         * ATC entries within the address range.
1719         */
1720        *cmd = (struct arm_smmu_cmdq_ent) {
1721                .opcode                 = CMDQ_OP_ATC_INV,
1722                .substream_valid        = !!ssid,
1723                .atc.ssid               = ssid,
1724        };
1725
1726        if (!size) {
1727                cmd->atc.size = ATC_INV_SIZE_ALL;
1728                return;
1729        }
1730
1731        page_start      = iova >> inval_grain_shift;
1732        page_end        = (iova + size - 1) >> inval_grain_shift;
1733
1734        /*
1735         * In an ATS Invalidate Request, the address must be aligned on the
1736         * range size, which must be a power of two number of page sizes. We
1737         * thus have to choose between grossly over-invalidating the region, or
1738         * splitting the invalidation into multiple commands. For simplicity
1739         * we'll go with the first solution, but should refine it in the future
1740         * if multiple commands are shown to be more efficient.
1741         *
1742         * Find the smallest power of two that covers the range. The most
1743         * significant differing bit between the start and end addresses,
1744         * fls(start ^ end), indicates the required span. For example:
1745         *
1746         * We want to invalidate pages [8; 11]. This is already the ideal range:
1747         *              x = 0b1000 ^ 0b1011 = 0b11
1748         *              span = 1 << fls(x) = 4
1749         *
1750         * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1751         *              x = 0b0111 ^ 0b1010 = 0b1101
1752         *              span = 1 << fls(x) = 16
1753         */
1754        log2_span       = fls_long(page_start ^ page_end);
1755        span_mask       = (1ULL << log2_span) - 1;
1756
1757        page_start      &= ~span_mask;
1758
1759        cmd->atc.addr   = page_start << inval_grain_shift;
1760        cmd->atc.size   = log2_span;
1761}
1762
1763static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1764{
1765        int i;
1766        struct arm_smmu_cmdq_ent cmd;
1767        struct arm_smmu_cmdq_batch cmds = {};
1768
1769        arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1770
1771        for (i = 0; i < master->num_streams; i++) {
1772                cmd.atc.sid = master->streams[i].id;
1773                arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1774        }
1775
1776        return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1777}
1778
1779int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1780                            unsigned long iova, size_t size)
1781{
1782        int i;
1783        unsigned long flags;
1784        struct arm_smmu_cmdq_ent cmd;
1785        struct arm_smmu_master *master;
1786        struct arm_smmu_cmdq_batch cmds;
1787
1788        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1789                return 0;
1790
1791        /*
1792         * Ensure that we've completed prior invalidation of the main TLBs
1793         * before we read 'nr_ats_masters' in case of a concurrent call to
1794         * arm_smmu_enable_ats():
1795         *
1796         *      // unmap()                      // arm_smmu_enable_ats()
1797         *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1798         *      smp_mb();                       [...]
1799         *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1800         *
1801         * Ensures that we always see the incremented 'nr_ats_masters' count if
1802         * ATS was enabled at the PCI device before completion of the TLBI.
1803         */
1804        smp_mb();
1805        if (!atomic_read(&smmu_domain->nr_ats_masters))
1806                return 0;
1807
1808        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1809
1810        cmds.num = 0;
1811
1812        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1813        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1814                if (!master->ats_enabled)
1815                        continue;
1816
1817                for (i = 0; i < master->num_streams; i++) {
1818                        cmd.atc.sid = master->streams[i].id;
1819                        arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1820                }
1821        }
1822        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1823
1824        return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1825}
1826
1827/* IO_PGTABLE API */
1828static void arm_smmu_tlb_inv_context(void *cookie)
1829{
1830        struct arm_smmu_domain *smmu_domain = cookie;
1831        struct arm_smmu_device *smmu = smmu_domain->smmu;
1832        struct arm_smmu_cmdq_ent cmd;
1833
1834        /*
1835         * NOTE: when io-pgtable is in non-strict mode, we may get here with
1836         * PTEs previously cleared by unmaps on the current CPU not yet visible
1837         * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1838         * insertion to guarantee those are observed before the TLBI. Do be
1839         * careful, 007.
1840         */
1841        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1842                arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1843        } else {
1844                cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1845                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1846                arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1847        }
1848        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1849}
1850
1851static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1852                                     unsigned long iova, size_t size,
1853                                     size_t granule,
1854                                     struct arm_smmu_domain *smmu_domain)
1855{
1856        struct arm_smmu_device *smmu = smmu_domain->smmu;
1857        unsigned long end = iova + size, num_pages = 0, tg = 0;
1858        size_t inv_range = granule;
1859        struct arm_smmu_cmdq_batch cmds;
1860
1861        if (!size)
1862                return;
1863
1864        if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1865                /* Get the leaf page size */
1866                tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1867
1868                /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1869                cmd->tlbi.tg = (tg - 10) / 2;
1870
1871                /* Determine what level the granule is at */
1872                cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1873
1874                num_pages = size >> tg;
1875        }
1876
1877        cmds.num = 0;
1878
1879        while (iova < end) {
1880                if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1881                        /*
1882                         * On each iteration of the loop, the range is 5 bits
1883                         * worth of the aligned size remaining.
1884                         * The range in pages is:
1885                         *
1886                         * range = (num_pages & (0x1f << __ffs(num_pages)))
1887                         */
1888                        unsigned long scale, num;
1889
1890                        /* Determine the power of 2 multiple number of pages */
1891                        scale = __ffs(num_pages);
1892                        cmd->tlbi.scale = scale;
1893
1894                        /* Determine how many chunks of 2^scale size we have */
1895                        num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1896                        cmd->tlbi.num = num - 1;
1897
1898                        /* range is num * 2^scale * pgsize */
1899                        inv_range = num << (scale + tg);
1900
1901                        /* Clear out the lower order bits for the next iteration */
1902                        num_pages -= num << scale;
1903                }
1904
1905                cmd->tlbi.addr = iova;
1906                arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1907                iova += inv_range;
1908        }
1909        arm_smmu_cmdq_batch_submit(smmu, &cmds);
1910}
1911
1912static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1913                                          size_t granule, bool leaf,
1914                                          struct arm_smmu_domain *smmu_domain)
1915{
1916        struct arm_smmu_cmdq_ent cmd = {
1917                .tlbi = {
1918                        .leaf   = leaf,
1919                },
1920        };
1921
1922        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1923                cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924                                  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1925                cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1926        } else {
1927                cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1928                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1929        }
1930        __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1931
1932        /*
1933         * Unfortunately, this can't be leaf-only since we may have
1934         * zapped an entire table.
1935         */
1936        arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1937}
1938
1939void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1940                                 size_t granule, bool leaf,
1941                                 struct arm_smmu_domain *smmu_domain)
1942{
1943        struct arm_smmu_cmdq_ent cmd = {
1944                .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1945                          CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1946                .tlbi = {
1947                        .asid   = asid,
1948                        .leaf   = leaf,
1949                },
1950        };
1951
1952        __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1953}
1954
1955static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1956                                         unsigned long iova, size_t granule,
1957                                         void *cookie)
1958{
1959        struct arm_smmu_domain *smmu_domain = cookie;
1960        struct iommu_domain *domain = &smmu_domain->domain;
1961
1962        iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1963}
1964
1965static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1966                                  size_t granule, void *cookie)
1967{
1968        arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1969}
1970
1971static const struct iommu_flush_ops arm_smmu_flush_ops = {
1972        .tlb_flush_all  = arm_smmu_tlb_inv_context,
1973        .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1974        .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1975};
1976
1977/* IOMMU API */
1978static bool arm_smmu_capable(enum iommu_cap cap)
1979{
1980        switch (cap) {
1981        case IOMMU_CAP_CACHE_COHERENCY:
1982                return true;
1983        case IOMMU_CAP_NOEXEC:
1984                return true;
1985        default:
1986                return false;
1987        }
1988}
1989
1990static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1991{
1992        struct arm_smmu_domain *smmu_domain;
1993
1994        if (type != IOMMU_DOMAIN_UNMANAGED &&
1995            type != IOMMU_DOMAIN_DMA &&
1996            type != IOMMU_DOMAIN_IDENTITY)
1997                return NULL;
1998
1999        /*
2000         * Allocate the domain and initialise some of its data structures.
2001         * We can't really do anything meaningful until we've added a
2002         * master.
2003         */
2004        smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2005        if (!smmu_domain)
2006                return NULL;
2007
2008        mutex_init(&smmu_domain->init_mutex);
2009        INIT_LIST_HEAD(&smmu_domain->devices);
2010        spin_lock_init(&smmu_domain->devices_lock);
2011        INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2012
2013        return &smmu_domain->domain;
2014}
2015
2016static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2017{
2018        int idx, size = 1 << span;
2019
2020        do {
2021                idx = find_first_zero_bit(map, size);
2022                if (idx == size)
2023                        return -ENOSPC;
2024        } while (test_and_set_bit(idx, map));
2025
2026        return idx;
2027}
2028
2029static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2030{
2031        clear_bit(idx, map);
2032}
2033
2034static void arm_smmu_domain_free(struct iommu_domain *domain)
2035{
2036        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2037        struct arm_smmu_device *smmu = smmu_domain->smmu;
2038
2039        free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2040
2041        /* Free the CD and ASID, if we allocated them */
2042        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2043                struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2044
2045                /* Prevent SVA from touching the CD while we're freeing it */
2046                mutex_lock(&arm_smmu_asid_lock);
2047                if (cfg->cdcfg.cdtab)
2048                        arm_smmu_free_cd_tables(smmu_domain);
2049                arm_smmu_free_asid(&cfg->cd);
2050                mutex_unlock(&arm_smmu_asid_lock);
2051        } else {
2052                struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2053                if (cfg->vmid)
2054                        arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2055        }
2056
2057        kfree(smmu_domain);
2058}
2059
2060static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2061                                       struct arm_smmu_master *master,
2062                                       struct io_pgtable_cfg *pgtbl_cfg)
2063{
2064        int ret;
2065        u32 asid;
2066        struct arm_smmu_device *smmu = smmu_domain->smmu;
2067        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2068        typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2069
2070        refcount_set(&cfg->cd.refs, 1);
2071
2072        /* Prevent SVA from modifying the ASID until it is written to the CD */
2073        mutex_lock(&arm_smmu_asid_lock);
2074        ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2075                       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2076        if (ret)
2077                goto out_unlock;
2078
2079        cfg->s1cdmax = master->ssid_bits;
2080
2081        smmu_domain->stall_enabled = master->stall_enabled;
2082
2083        ret = arm_smmu_alloc_cd_tables(smmu_domain);
2084        if (ret)
2085                goto out_free_asid;
2086
2087        cfg->cd.asid    = (u16)asid;
2088        cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2089        cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2090                          FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2091                          FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2092                          FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2093                          FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2094                          FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2095                          CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2096        cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2097
2098        /*
2099         * Note that this will end up calling arm_smmu_sync_cd() before
2100         * the master has been added to the devices list for this domain.
2101         * This isn't an issue because the STE hasn't been installed yet.
2102         */
2103        ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2104        if (ret)
2105                goto out_free_cd_tables;
2106
2107        mutex_unlock(&arm_smmu_asid_lock);
2108        return 0;
2109
2110out_free_cd_tables:
2111        arm_smmu_free_cd_tables(smmu_domain);
2112out_free_asid:
2113        arm_smmu_free_asid(&cfg->cd);
2114out_unlock:
2115        mutex_unlock(&arm_smmu_asid_lock);
2116        return ret;
2117}
2118
2119static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2120                                       struct arm_smmu_master *master,
2121                                       struct io_pgtable_cfg *pgtbl_cfg)
2122{
2123        int vmid;
2124        struct arm_smmu_device *smmu = smmu_domain->smmu;
2125        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2126        typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2127
2128        vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2129        if (vmid < 0)
2130                return vmid;
2131
2132        vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2133        cfg->vmid       = (u16)vmid;
2134        cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2135        cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2136                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2137                          FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2138                          FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2139                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2140                          FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2141                          FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2142        return 0;
2143}
2144
2145static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2146                                    struct arm_smmu_master *master)
2147{
2148        int ret;
2149        unsigned long ias, oas;
2150        enum io_pgtable_fmt fmt;
2151        struct io_pgtable_cfg pgtbl_cfg;
2152        struct io_pgtable_ops *pgtbl_ops;
2153        int (*finalise_stage_fn)(struct arm_smmu_domain *,
2154                                 struct arm_smmu_master *,
2155                                 struct io_pgtable_cfg *);
2156        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2157        struct arm_smmu_device *smmu = smmu_domain->smmu;
2158
2159        if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2160                smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2161                return 0;
2162        }
2163
2164        /* Restrict the stage to what we can actually support */
2165        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2166                smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2167        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2168                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2169
2170        switch (smmu_domain->stage) {
2171        case ARM_SMMU_DOMAIN_S1:
2172                ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2173                ias = min_t(unsigned long, ias, VA_BITS);
2174                oas = smmu->ias;
2175                fmt = ARM_64_LPAE_S1;
2176                finalise_stage_fn = arm_smmu_domain_finalise_s1;
2177                break;
2178        case ARM_SMMU_DOMAIN_NESTED:
2179        case ARM_SMMU_DOMAIN_S2:
2180                ias = smmu->ias;
2181                oas = smmu->oas;
2182                fmt = ARM_64_LPAE_S2;
2183                finalise_stage_fn = arm_smmu_domain_finalise_s2;
2184                break;
2185        default:
2186                return -EINVAL;
2187        }
2188
2189        pgtbl_cfg = (struct io_pgtable_cfg) {
2190                .pgsize_bitmap  = smmu->pgsize_bitmap,
2191                .ias            = ias,
2192                .oas            = oas,
2193                .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2194                .tlb            = &arm_smmu_flush_ops,
2195                .iommu_dev      = smmu->dev,
2196        };
2197
2198        if (!iommu_get_dma_strict(domain))
2199                pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2200
2201        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2202        if (!pgtbl_ops)
2203                return -ENOMEM;
2204
2205        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2206        domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2207        domain->geometry.force_aperture = true;
2208
2209        ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2210        if (ret < 0) {
2211                free_io_pgtable_ops(pgtbl_ops);
2212                return ret;
2213        }
2214
2215        smmu_domain->pgtbl_ops = pgtbl_ops;
2216        return 0;
2217}
2218
2219static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2220{
2221        __le64 *step;
2222        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2223
2224        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2225                struct arm_smmu_strtab_l1_desc *l1_desc;
2226                int idx;
2227
2228                /* Two-level walk */
2229                idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2230                l1_desc = &cfg->l1_desc[idx];
2231                idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2232                step = &l1_desc->l2ptr[idx];
2233        } else {
2234                /* Simple linear lookup */
2235                step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2236        }
2237
2238        return step;
2239}
2240
2241static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2242{
2243        int i, j;
2244        struct arm_smmu_device *smmu = master->smmu;
2245
2246        for (i = 0; i < master->num_streams; ++i) {
2247                u32 sid = master->streams[i].id;
2248                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2249
2250                /* Bridged PCI devices may end up with duplicated IDs */
2251                for (j = 0; j < i; j++)
2252                        if (master->streams[j].id == sid)
2253                                break;
2254                if (j < i)
2255                        continue;
2256
2257                arm_smmu_write_strtab_ent(master, sid, step);
2258        }
2259}
2260
2261static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2262{
2263        struct device *dev = master->dev;
2264        struct arm_smmu_device *smmu = master->smmu;
2265        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2266
2267        if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2268                return false;
2269
2270        if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2271                return false;
2272
2273        return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2274}
2275
2276static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2277{
2278        size_t stu;
2279        struct pci_dev *pdev;
2280        struct arm_smmu_device *smmu = master->smmu;
2281        struct arm_smmu_domain *smmu_domain = master->domain;
2282
2283        /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2284        if (!master->ats_enabled)
2285                return;
2286
2287        /* Smallest Translation Unit: log2 of the smallest supported granule */
2288        stu = __ffs(smmu->pgsize_bitmap);
2289        pdev = to_pci_dev(master->dev);
2290
2291        atomic_inc(&smmu_domain->nr_ats_masters);
2292        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2293        if (pci_enable_ats(pdev, stu))
2294                dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2295}
2296
2297static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2298{
2299        struct arm_smmu_domain *smmu_domain = master->domain;
2300
2301        if (!master->ats_enabled)
2302                return;
2303
2304        pci_disable_ats(to_pci_dev(master->dev));
2305        /*
2306         * Ensure ATS is disabled at the endpoint before we issue the
2307         * ATC invalidation via the SMMU.
2308         */
2309        wmb();
2310        arm_smmu_atc_inv_master(master);
2311        atomic_dec(&smmu_domain->nr_ats_masters);
2312}
2313
2314static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2315{
2316        int ret;
2317        int features;
2318        int num_pasids;
2319        struct pci_dev *pdev;
2320
2321        if (!dev_is_pci(master->dev))
2322                return -ENODEV;
2323
2324        pdev = to_pci_dev(master->dev);
2325
2326        features = pci_pasid_features(pdev);
2327        if (features < 0)
2328                return features;
2329
2330        num_pasids = pci_max_pasids(pdev);
2331        if (num_pasids <= 0)
2332                return num_pasids;
2333
2334        ret = pci_enable_pasid(pdev, features);
2335        if (ret) {
2336                dev_err(&pdev->dev, "Failed to enable PASID\n");
2337                return ret;
2338        }
2339
2340        master->ssid_bits = min_t(u8, ilog2(num_pasids),
2341                                  master->smmu->ssid_bits);
2342        return 0;
2343}
2344
2345static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2346{
2347        struct pci_dev *pdev;
2348
2349        if (!dev_is_pci(master->dev))
2350                return;
2351
2352        pdev = to_pci_dev(master->dev);
2353
2354        if (!pdev->pasid_enabled)
2355                return;
2356
2357        master->ssid_bits = 0;
2358        pci_disable_pasid(pdev);
2359}
2360
2361static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2362{
2363        unsigned long flags;
2364        struct arm_smmu_domain *smmu_domain = master->domain;
2365
2366        if (!smmu_domain)
2367                return;
2368
2369        arm_smmu_disable_ats(master);
2370
2371        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2372        list_del(&master->domain_head);
2373        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2374
2375        master->domain = NULL;
2376        master->ats_enabled = false;
2377        arm_smmu_install_ste_for_dev(master);
2378}
2379
2380static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2381{
2382        int ret = 0;
2383        unsigned long flags;
2384        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2385        struct arm_smmu_device *smmu;
2386        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2387        struct arm_smmu_master *master;
2388
2389        if (!fwspec)
2390                return -ENOENT;
2391
2392        master = dev_iommu_priv_get(dev);
2393        smmu = master->smmu;
2394
2395        /*
2396         * Checking that SVA is disabled ensures that this device isn't bound to
2397         * any mm, and can be safely detached from its old domain. Bonds cannot
2398         * be removed concurrently since we're holding the group mutex.
2399         */
2400        if (arm_smmu_master_sva_enabled(master)) {
2401                dev_err(dev, "cannot attach - SVA enabled\n");
2402                return -EBUSY;
2403        }
2404
2405        arm_smmu_detach_dev(master);
2406
2407        mutex_lock(&smmu_domain->init_mutex);
2408
2409        if (!smmu_domain->smmu) {
2410                smmu_domain->smmu = smmu;
2411                ret = arm_smmu_domain_finalise(domain, master);
2412                if (ret) {
2413                        smmu_domain->smmu = NULL;
2414                        goto out_unlock;
2415                }
2416        } else if (smmu_domain->smmu != smmu) {
2417                dev_err(dev,
2418                        "cannot attach to SMMU %s (upstream of %s)\n",
2419                        dev_name(smmu_domain->smmu->dev),
2420                        dev_name(smmu->dev));
2421                ret = -ENXIO;
2422                goto out_unlock;
2423        } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2424                   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2425                dev_err(dev,
2426                        "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2427                        smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2428                ret = -EINVAL;
2429                goto out_unlock;
2430        } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2431                   smmu_domain->stall_enabled != master->stall_enabled) {
2432                dev_err(dev, "cannot attach to stall-%s domain\n",
2433                        smmu_domain->stall_enabled ? "enabled" : "disabled");
2434                ret = -EINVAL;
2435                goto out_unlock;
2436        }
2437
2438        master->domain = smmu_domain;
2439
2440        if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2441                master->ats_enabled = arm_smmu_ats_supported(master);
2442
2443        arm_smmu_install_ste_for_dev(master);
2444
2445        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2446        list_add(&master->domain_head, &smmu_domain->devices);
2447        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2448
2449        arm_smmu_enable_ats(master);
2450
2451out_unlock:
2452        mutex_unlock(&smmu_domain->init_mutex);
2453        return ret;
2454}
2455
2456static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2457                              phys_addr_t paddr, size_t pgsize, size_t pgcount,
2458                              int prot, gfp_t gfp, size_t *mapped)
2459{
2460        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2461
2462        if (!ops)
2463                return -ENODEV;
2464
2465        return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2466}
2467
2468static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2469                                   size_t pgsize, size_t pgcount,
2470                                   struct iommu_iotlb_gather *gather)
2471{
2472        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2473        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2474
2475        if (!ops)
2476                return 0;
2477
2478        return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2479}
2480
2481static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2482{
2483        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2484
2485        if (smmu_domain->smmu)
2486                arm_smmu_tlb_inv_context(smmu_domain);
2487}
2488
2489static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2490                                struct iommu_iotlb_gather *gather)
2491{
2492        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2493
2494        if (!gather->pgsize)
2495                return;
2496
2497        arm_smmu_tlb_inv_range_domain(gather->start,
2498                                      gather->end - gather->start + 1,
2499                                      gather->pgsize, true, smmu_domain);
2500}
2501
2502static phys_addr_t
2503arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2504{
2505        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2506
2507        if (!ops)
2508                return 0;
2509
2510        return ops->iova_to_phys(ops, iova);
2511}
2512
2513static struct platform_driver arm_smmu_driver;
2514
2515static
2516struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2517{
2518        struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2519                                                          fwnode);
2520        put_device(dev);
2521        return dev ? dev_get_drvdata(dev) : NULL;
2522}
2523
2524static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2525{
2526        unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2527
2528        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2529                limit *= 1UL << STRTAB_SPLIT;
2530
2531        return sid < limit;
2532}
2533
2534static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2535                                  struct arm_smmu_master *master)
2536{
2537        int i;
2538        int ret = 0;
2539        struct arm_smmu_stream *new_stream, *cur_stream;
2540        struct rb_node **new_node, *parent_node = NULL;
2541        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2542
2543        master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2544                                  GFP_KERNEL);
2545        if (!master->streams)
2546                return -ENOMEM;
2547        master->num_streams = fwspec->num_ids;
2548
2549        mutex_lock(&smmu->streams_mutex);
2550        for (i = 0; i < fwspec->num_ids; i++) {
2551                u32 sid = fwspec->ids[i];
2552
2553                new_stream = &master->streams[i];
2554                new_stream->id = sid;
2555                new_stream->master = master;
2556
2557                /*
2558                 * Check the SIDs are in range of the SMMU and our stream table
2559                 */
2560                if (!arm_smmu_sid_in_range(smmu, sid)) {
2561                        ret = -ERANGE;
2562                        break;
2563                }
2564
2565                /* Ensure l2 strtab is initialised */
2566                if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2567                        ret = arm_smmu_init_l2_strtab(smmu, sid);
2568                        if (ret)
2569                                break;
2570                }
2571
2572                /* Insert into SID tree */
2573                new_node = &(smmu->streams.rb_node);
2574                while (*new_node) {
2575                        cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2576                                              node);
2577                        parent_node = *new_node;
2578                        if (cur_stream->id > new_stream->id) {
2579                                new_node = &((*new_node)->rb_left);
2580                        } else if (cur_stream->id < new_stream->id) {
2581                                new_node = &((*new_node)->rb_right);
2582                        } else {
2583                                dev_warn(master->dev,
2584                                         "stream %u already in tree\n",
2585                                         cur_stream->id);
2586                                ret = -EINVAL;
2587                                break;
2588                        }
2589                }
2590                if (ret)
2591                        break;
2592
2593                rb_link_node(&new_stream->node, parent_node, new_node);
2594                rb_insert_color(&new_stream->node, &smmu->streams);
2595        }
2596
2597        if (ret) {
2598                for (i--; i >= 0; i--)
2599                        rb_erase(&master->streams[i].node, &smmu->streams);
2600                kfree(master->streams);
2601        }
2602        mutex_unlock(&smmu->streams_mutex);
2603
2604        return ret;
2605}
2606
2607static void arm_smmu_remove_master(struct arm_smmu_master *master)
2608{
2609        int i;
2610        struct arm_smmu_device *smmu = master->smmu;
2611        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2612
2613        if (!smmu || !master->streams)
2614                return;
2615
2616        mutex_lock(&smmu->streams_mutex);
2617        for (i = 0; i < fwspec->num_ids; i++)
2618                rb_erase(&master->streams[i].node, &smmu->streams);
2619        mutex_unlock(&smmu->streams_mutex);
2620
2621        kfree(master->streams);
2622}
2623
2624static struct iommu_ops arm_smmu_ops;
2625
2626static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2627{
2628        int ret;
2629        struct arm_smmu_device *smmu;
2630        struct arm_smmu_master *master;
2631        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2632
2633        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2634                return ERR_PTR(-ENODEV);
2635
2636        if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2637                return ERR_PTR(-EBUSY);
2638
2639        smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2640        if (!smmu)
2641                return ERR_PTR(-ENODEV);
2642
2643        master = kzalloc(sizeof(*master), GFP_KERNEL);
2644        if (!master)
2645                return ERR_PTR(-ENOMEM);
2646
2647        master->dev = dev;
2648        master->smmu = smmu;
2649        INIT_LIST_HEAD(&master->bonds);
2650        dev_iommu_priv_set(dev, master);
2651
2652        ret = arm_smmu_insert_master(smmu, master);
2653        if (ret)
2654                goto err_free_master;
2655
2656        device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2657        master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2658
2659        /*
2660         * Note that PASID must be enabled before, and disabled after ATS:
2661         * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2662         *
2663         *   Behavior is undefined if this bit is Set and the value of the PASID
2664         *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2665         *   are changed.
2666         */
2667        arm_smmu_enable_pasid(master);
2668
2669        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2670                master->ssid_bits = min_t(u8, master->ssid_bits,
2671                                          CTXDESC_LINEAR_CDMAX);
2672
2673        if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2674             device_property_read_bool(dev, "dma-can-stall")) ||
2675            smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2676                master->stall_enabled = true;
2677
2678        return &smmu->iommu;
2679
2680err_free_master:
2681        kfree(master);
2682        dev_iommu_priv_set(dev, NULL);
2683        return ERR_PTR(ret);
2684}
2685
2686static void arm_smmu_release_device(struct device *dev)
2687{
2688        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2689        struct arm_smmu_master *master;
2690
2691        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2692                return;
2693
2694        master = dev_iommu_priv_get(dev);
2695        if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2696                iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2697        arm_smmu_detach_dev(master);
2698        arm_smmu_disable_pasid(master);
2699        arm_smmu_remove_master(master);
2700        kfree(master);
2701        iommu_fwspec_free(dev);
2702}
2703
2704static struct iommu_group *arm_smmu_device_group(struct device *dev)
2705{
2706        struct iommu_group *group;
2707
2708        /*
2709         * We don't support devices sharing stream IDs other than PCI RID
2710         * aliases, since the necessary ID-to-device lookup becomes rather
2711         * impractical given a potential sparse 32-bit stream ID space.
2712         */
2713        if (dev_is_pci(dev))
2714                group = pci_device_group(dev);
2715        else
2716                group = generic_device_group(dev);
2717
2718        return group;
2719}
2720
2721static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2722{
2723        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2724        int ret = 0;
2725
2726        mutex_lock(&smmu_domain->init_mutex);
2727        if (smmu_domain->smmu)
2728                ret = -EPERM;
2729        else
2730                smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2731        mutex_unlock(&smmu_domain->init_mutex);
2732
2733        return ret;
2734}
2735
2736static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2737{
2738        return iommu_fwspec_add_ids(dev, args->args, 1);
2739}
2740
2741static void arm_smmu_get_resv_regions(struct device *dev,
2742                                      struct list_head *head)
2743{
2744        struct iommu_resv_region *region;
2745        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2746
2747        region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2748                                         prot, IOMMU_RESV_SW_MSI);
2749        if (!region)
2750                return;
2751
2752        list_add_tail(&region->list, head);
2753
2754        iommu_dma_get_resv_regions(dev, head);
2755}
2756
2757static bool arm_smmu_dev_has_feature(struct device *dev,
2758                                     enum iommu_dev_features feat)
2759{
2760        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2761
2762        if (!master)
2763                return false;
2764
2765        switch (feat) {
2766        case IOMMU_DEV_FEAT_IOPF:
2767                return arm_smmu_master_iopf_supported(master);
2768        case IOMMU_DEV_FEAT_SVA:
2769                return arm_smmu_master_sva_supported(master);
2770        default:
2771                return false;
2772        }
2773}
2774
2775static bool arm_smmu_dev_feature_enabled(struct device *dev,
2776                                         enum iommu_dev_features feat)
2777{
2778        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2779
2780        if (!master)
2781                return false;
2782
2783        switch (feat) {
2784        case IOMMU_DEV_FEAT_IOPF:
2785                return master->iopf_enabled;
2786        case IOMMU_DEV_FEAT_SVA:
2787                return arm_smmu_master_sva_enabled(master);
2788        default:
2789                return false;
2790        }
2791}
2792
2793static int arm_smmu_dev_enable_feature(struct device *dev,
2794                                       enum iommu_dev_features feat)
2795{
2796        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2797
2798        if (!arm_smmu_dev_has_feature(dev, feat))
2799                return -ENODEV;
2800
2801        if (arm_smmu_dev_feature_enabled(dev, feat))
2802                return -EBUSY;
2803
2804        switch (feat) {
2805        case IOMMU_DEV_FEAT_IOPF:
2806                master->iopf_enabled = true;
2807                return 0;
2808        case IOMMU_DEV_FEAT_SVA:
2809                return arm_smmu_master_enable_sva(master);
2810        default:
2811                return -EINVAL;
2812        }
2813}
2814
2815static int arm_smmu_dev_disable_feature(struct device *dev,
2816                                        enum iommu_dev_features feat)
2817{
2818        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2819
2820        if (!arm_smmu_dev_feature_enabled(dev, feat))
2821                return -EINVAL;
2822
2823        switch (feat) {
2824        case IOMMU_DEV_FEAT_IOPF:
2825                if (master->sva_enabled)
2826                        return -EBUSY;
2827                master->iopf_enabled = false;
2828                return 0;
2829        case IOMMU_DEV_FEAT_SVA:
2830                return arm_smmu_master_disable_sva(master);
2831        default:
2832                return -EINVAL;
2833        }
2834}
2835
2836static struct iommu_ops arm_smmu_ops = {
2837        .capable                = arm_smmu_capable,
2838        .domain_alloc           = arm_smmu_domain_alloc,
2839        .domain_free            = arm_smmu_domain_free,
2840        .attach_dev             = arm_smmu_attach_dev,
2841        .map_pages              = arm_smmu_map_pages,
2842        .unmap_pages            = arm_smmu_unmap_pages,
2843        .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2844        .iotlb_sync             = arm_smmu_iotlb_sync,
2845        .iova_to_phys           = arm_smmu_iova_to_phys,
2846        .probe_device           = arm_smmu_probe_device,
2847        .release_device         = arm_smmu_release_device,
2848        .device_group           = arm_smmu_device_group,
2849        .enable_nesting         = arm_smmu_enable_nesting,
2850        .of_xlate               = arm_smmu_of_xlate,
2851        .get_resv_regions       = arm_smmu_get_resv_regions,
2852        .put_resv_regions       = generic_iommu_put_resv_regions,
2853        .dev_has_feat           = arm_smmu_dev_has_feature,
2854        .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2855        .dev_enable_feat        = arm_smmu_dev_enable_feature,
2856        .dev_disable_feat       = arm_smmu_dev_disable_feature,
2857        .sva_bind               = arm_smmu_sva_bind,
2858        .sva_unbind             = arm_smmu_sva_unbind,
2859        .sva_get_pasid          = arm_smmu_sva_get_pasid,
2860        .page_response          = arm_smmu_page_response,
2861        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2862        .owner                  = THIS_MODULE,
2863};
2864
2865/* Probing and initialisation functions */
2866static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2867                                   struct arm_smmu_queue *q,
2868                                   void __iomem *page,
2869                                   unsigned long prod_off,
2870                                   unsigned long cons_off,
2871                                   size_t dwords, const char *name)
2872{
2873        size_t qsz;
2874
2875        do {
2876                qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2877                q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2878                                              GFP_KERNEL);
2879                if (q->base || qsz < PAGE_SIZE)
2880                        break;
2881
2882                q->llq.max_n_shift--;
2883        } while (1);
2884
2885        if (!q->base) {
2886                dev_err(smmu->dev,
2887                        "failed to allocate queue (0x%zx bytes) for %s\n",
2888                        qsz, name);
2889                return -ENOMEM;
2890        }
2891
2892        if (!WARN_ON(q->base_dma & (qsz - 1))) {
2893                dev_info(smmu->dev, "allocated %u entries for %s\n",
2894                         1 << q->llq.max_n_shift, name);
2895        }
2896
2897        q->prod_reg     = page + prod_off;
2898        q->cons_reg     = page + cons_off;
2899        q->ent_dwords   = dwords;
2900
2901        q->q_base  = Q_BASE_RWA;
2902        q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2903        q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2904
2905        q->llq.prod = q->llq.cons = 0;
2906        return 0;
2907}
2908
2909static void arm_smmu_cmdq_free_bitmap(void *data)
2910{
2911        unsigned long *bitmap = data;
2912        bitmap_free(bitmap);
2913}
2914
2915static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2916{
2917        int ret = 0;
2918        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2919        unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2920        atomic_long_t *bitmap;
2921
2922        atomic_set(&cmdq->owner_prod, 0);
2923        atomic_set(&cmdq->lock, 0);
2924
2925        bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2926        if (!bitmap) {
2927                dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2928                ret = -ENOMEM;
2929        } else {
2930                cmdq->valid_map = bitmap;
2931                devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2932        }
2933
2934        return ret;
2935}
2936
2937static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2938{
2939        int ret;
2940
2941        /* cmdq */
2942        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2943                                      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2944                                      CMDQ_ENT_DWORDS, "cmdq");
2945        if (ret)
2946                return ret;
2947
2948        ret = arm_smmu_cmdq_init(smmu);
2949        if (ret)
2950                return ret;
2951
2952        /* evtq */
2953        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2954                                      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2955                                      EVTQ_ENT_DWORDS, "evtq");
2956        if (ret)
2957                return ret;
2958
2959        if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2960            (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2961                smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2962                if (!smmu->evtq.iopf)
2963                        return -ENOMEM;
2964        }
2965
2966        /* priq */
2967        if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2968                return 0;
2969
2970        return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2971                                       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2972                                       PRIQ_ENT_DWORDS, "priq");
2973}
2974
2975static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2976{
2977        unsigned int i;
2978        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2979        size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2980        void *strtab = smmu->strtab_cfg.strtab;
2981
2982        cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2983        if (!cfg->l1_desc)
2984                return -ENOMEM;
2985
2986        for (i = 0; i < cfg->num_l1_ents; ++i) {
2987                arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2988                strtab += STRTAB_L1_DESC_DWORDS << 3;
2989        }
2990
2991        return 0;
2992}
2993
2994static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2995{
2996        void *strtab;
2997        u64 reg;
2998        u32 size, l1size;
2999        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3000
3001        /* Calculate the L1 size, capped to the SIDSIZE. */
3002        size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3003        size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3004        cfg->num_l1_ents = 1 << size;
3005
3006        size += STRTAB_SPLIT;
3007        if (size < smmu->sid_bits)
3008                dev_warn(smmu->dev,
3009                         "2-level strtab only covers %u/%u bits of SID\n",
3010                         size, smmu->sid_bits);
3011
3012        l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3013        strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3014                                     GFP_KERNEL);
3015        if (!strtab) {
3016                dev_err(smmu->dev,
3017                        "failed to allocate l1 stream table (%u bytes)\n",
3018                        l1size);
3019                return -ENOMEM;
3020        }
3021        cfg->strtab = strtab;
3022
3023        /* Configure strtab_base_cfg for 2 levels */
3024        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3025        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3026        reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3027        cfg->strtab_base_cfg = reg;
3028
3029        return arm_smmu_init_l1_strtab(smmu);
3030}
3031
3032static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3033{
3034        void *strtab;
3035        u64 reg;
3036        u32 size;
3037        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3038
3039        size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3040        strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3041                                     GFP_KERNEL);
3042        if (!strtab) {
3043                dev_err(smmu->dev,
3044                        "failed to allocate linear stream table (%u bytes)\n",
3045                        size);
3046                return -ENOMEM;
3047        }
3048        cfg->strtab = strtab;
3049        cfg->num_l1_ents = 1 << smmu->sid_bits;
3050
3051        /* Configure strtab_base_cfg for a linear table covering all SIDs */
3052        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3053        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3054        cfg->strtab_base_cfg = reg;
3055
3056        arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3057        return 0;
3058}
3059
3060static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3061{
3062        u64 reg;
3063        int ret;
3064
3065        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3066                ret = arm_smmu_init_strtab_2lvl(smmu);
3067        else
3068                ret = arm_smmu_init_strtab_linear(smmu);
3069
3070        if (ret)
3071                return ret;
3072
3073        /* Set the strtab base address */
3074        reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3075        reg |= STRTAB_BASE_RA;
3076        smmu->strtab_cfg.strtab_base = reg;
3077
3078        /* Allocate the first VMID for stage-2 bypass STEs */
3079        set_bit(0, smmu->vmid_map);
3080        return 0;
3081}
3082
3083static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3084{
3085        int ret;
3086
3087        mutex_init(&smmu->streams_mutex);
3088        smmu->streams = RB_ROOT;
3089
3090        ret = arm_smmu_init_queues(smmu);
3091        if (ret)
3092                return ret;
3093
3094        return arm_smmu_init_strtab(smmu);
3095}
3096
3097static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3098                                   unsigned int reg_off, unsigned int ack_off)
3099{
3100        u32 reg;
3101
3102        writel_relaxed(val, smmu->base + reg_off);
3103        return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3104                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3105}
3106
3107/* GBPA is "special" */
3108static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3109{
3110        int ret;
3111        u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3112
3113        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3114                                         1, ARM_SMMU_POLL_TIMEOUT_US);
3115        if (ret)
3116                return ret;
3117
3118        reg &= ~clr;
3119        reg |= set;
3120        writel_relaxed(reg | GBPA_UPDATE, gbpa);
3121        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3122                                         1, ARM_SMMU_POLL_TIMEOUT_US);
3123
3124        if (ret)
3125                dev_err(smmu->dev, "GBPA not responding to update\n");
3126        return ret;
3127}
3128
3129static void arm_smmu_free_msis(void *data)
3130{
3131        struct device *dev = data;
3132        platform_msi_domain_free_irqs(dev);
3133}
3134
3135static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3136{
3137        phys_addr_t doorbell;
3138        struct device *dev = msi_desc_to_dev(desc);
3139        struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3140        phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3141
3142        doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3143        doorbell &= MSI_CFG0_ADDR_MASK;
3144
3145        writeq_relaxed(doorbell, smmu->base + cfg[0]);
3146        writel_relaxed(msg->data, smmu->base + cfg[1]);
3147        writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3148}
3149
3150static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3151{
3152        struct msi_desc *desc;
3153        int ret, nvec = ARM_SMMU_MAX_MSIS;
3154        struct device *dev = smmu->dev;
3155
3156        /* Clear the MSI address regs */
3157        writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3158        writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3159
3160        if (smmu->features & ARM_SMMU_FEAT_PRI)
3161                writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3162        else
3163                nvec--;
3164
3165        if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3166                return;
3167
3168        if (!dev->msi_domain) {
3169                dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3170                return;
3171        }
3172
3173        /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3174        ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3175        if (ret) {
3176                dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3177                return;
3178        }
3179
3180        for_each_msi_entry(desc, dev) {
3181                switch (desc->platform.msi_index) {
3182                case EVTQ_MSI_INDEX:
3183                        smmu->evtq.q.irq = desc->irq;
3184                        break;
3185                case GERROR_MSI_INDEX:
3186                        smmu->gerr_irq = desc->irq;
3187                        break;
3188                case PRIQ_MSI_INDEX:
3189                        smmu->priq.q.irq = desc->irq;
3190                        break;
3191                default:        /* Unknown */
3192                        continue;
3193                }
3194        }
3195
3196        /* Add callback to free MSIs on teardown */
3197        devm_add_action(dev, arm_smmu_free_msis, dev);
3198}
3199
3200static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3201{
3202        int irq, ret;
3203
3204        arm_smmu_setup_msis(smmu);
3205
3206        /* Request interrupt lines */
3207        irq = smmu->evtq.q.irq;
3208        if (irq) {
3209                ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3210                                                arm_smmu_evtq_thread,
3211                                                IRQF_ONESHOT,
3212                                                "arm-smmu-v3-evtq", smmu);
3213                if (ret < 0)
3214                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
3215        } else {
3216                dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3217        }
3218
3219        irq = smmu->gerr_irq;
3220        if (irq) {
3221                ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3222                                       0, "arm-smmu-v3-gerror", smmu);
3223                if (ret < 0)
3224                        dev_warn(smmu->dev, "failed to enable gerror irq\n");
3225        } else {
3226                dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3227        }
3228
3229        if (smmu->features & ARM_SMMU_FEAT_PRI) {
3230                irq = smmu->priq.q.irq;
3231                if (irq) {
3232                        ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3233                                                        arm_smmu_priq_thread,
3234                                                        IRQF_ONESHOT,
3235                                                        "arm-smmu-v3-priq",
3236                                                        smmu);
3237                        if (ret < 0)
3238                                dev_warn(smmu->dev,
3239                                         "failed to enable priq irq\n");
3240                } else {
3241                        dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3242                }
3243        }
3244}
3245
3246static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3247{
3248        int ret, irq;
3249        u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3250
3251        /* Disable IRQs first */
3252        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3253                                      ARM_SMMU_IRQ_CTRLACK);
3254        if (ret) {
3255                dev_err(smmu->dev, "failed to disable irqs\n");
3256                return ret;
3257        }
3258
3259        irq = smmu->combined_irq;
3260        if (irq) {
3261                /*
3262                 * Cavium ThunderX2 implementation doesn't support unique irq
3263                 * lines. Use a single irq line for all the SMMUv3 interrupts.
3264                 */
3265                ret = devm_request_threaded_irq(smmu->dev, irq,
3266                                        arm_smmu_combined_irq_handler,
3267                                        arm_smmu_combined_irq_thread,
3268                                        IRQF_ONESHOT,
3269                                        "arm-smmu-v3-combined-irq", smmu);
3270                if (ret < 0)
3271                        dev_warn(smmu->dev, "failed to enable combined irq\n");
3272        } else
3273                arm_smmu_setup_unique_irqs(smmu);
3274
3275        if (smmu->features & ARM_SMMU_FEAT_PRI)
3276                irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3277
3278        /* Enable interrupt generation on the SMMU */
3279        ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3280                                      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3281        if (ret)
3282                dev_warn(smmu->dev, "failed to enable irqs\n");
3283
3284        return 0;
3285}
3286
3287static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3288{
3289        int ret;
3290
3291        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3292        if (ret)
3293                dev_err(smmu->dev, "failed to clear cr0\n");
3294
3295        return ret;
3296}
3297
3298static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3299{
3300        int ret;
3301        u32 reg, enables;
3302        struct arm_smmu_cmdq_ent cmd;
3303
3304        /* Clear CR0 and sync (disables SMMU and queue processing) */
3305        reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3306        if (reg & CR0_SMMUEN) {
3307                dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3308                WARN_ON(is_kdump_kernel() && !disable_bypass);
3309                arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3310        }
3311
3312        ret = arm_smmu_device_disable(smmu);
3313        if (ret)
3314                return ret;
3315
3316        /* CR1 (table and queue memory attributes) */
3317        reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3318              FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3319              FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3320              FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3321              FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3322              FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3323        writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3324
3325        /* CR2 (random crap) */
3326        reg = CR2_PTM | CR2_RECINVSID;
3327
3328        if (smmu->features & ARM_SMMU_FEAT_E2H)
3329                reg |= CR2_E2H;
3330
3331        writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3332
3333        /* Stream table */
3334        writeq_relaxed(smmu->strtab_cfg.strtab_base,
3335                       smmu->base + ARM_SMMU_STRTAB_BASE);
3336        writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3337                       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3338
3339        /* Command queue */
3340        writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3341        writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3342        writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3343
3344        enables = CR0_CMDQEN;
3345        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3346                                      ARM_SMMU_CR0ACK);
3347        if (ret) {
3348                dev_err(smmu->dev, "failed to enable command queue\n");
3349                return ret;
3350        }
3351
3352        /* Invalidate any cached configuration */
3353        cmd.opcode = CMDQ_OP_CFGI_ALL;
3354        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3355
3356        /* Invalidate any stale TLB entries */
3357        if (smmu->features & ARM_SMMU_FEAT_HYP) {
3358                cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3359                arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3360        }
3361
3362        cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3363        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3364
3365        /* Event queue */
3366        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3367        writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3368        writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3369
3370        enables |= CR0_EVTQEN;
3371        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3372                                      ARM_SMMU_CR0ACK);
3373        if (ret) {
3374                dev_err(smmu->dev, "failed to enable event queue\n");
3375                return ret;
3376        }
3377
3378        /* PRI queue */
3379        if (smmu->features & ARM_SMMU_FEAT_PRI) {
3380                writeq_relaxed(smmu->priq.q.q_base,
3381                               smmu->base + ARM_SMMU_PRIQ_BASE);
3382                writel_relaxed(smmu->priq.q.llq.prod,
3383                               smmu->page1 + ARM_SMMU_PRIQ_PROD);
3384                writel_relaxed(smmu->priq.q.llq.cons,
3385                               smmu->page1 + ARM_SMMU_PRIQ_CONS);
3386
3387                enables |= CR0_PRIQEN;
3388                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3389                                              ARM_SMMU_CR0ACK);
3390                if (ret) {
3391                        dev_err(smmu->dev, "failed to enable PRI queue\n");
3392                        return ret;
3393                }
3394        }
3395
3396        if (smmu->features & ARM_SMMU_FEAT_ATS) {
3397                enables |= CR0_ATSCHK;
3398                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3399                                              ARM_SMMU_CR0ACK);
3400                if (ret) {
3401                        dev_err(smmu->dev, "failed to enable ATS check\n");
3402                        return ret;
3403                }
3404        }
3405
3406        ret = arm_smmu_setup_irqs(smmu);
3407        if (ret) {
3408                dev_err(smmu->dev, "failed to setup irqs\n");
3409                return ret;
3410        }
3411
3412        if (is_kdump_kernel())
3413                enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3414
3415        /* Enable the SMMU interface, or ensure bypass */
3416        if (!bypass || disable_bypass) {
3417                enables |= CR0_SMMUEN;
3418        } else {
3419                ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3420                if (ret)
3421                        return ret;
3422        }
3423        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3424                                      ARM_SMMU_CR0ACK);
3425        if (ret) {
3426                dev_err(smmu->dev, "failed to enable SMMU interface\n");
3427                return ret;
3428        }
3429
3430        return 0;
3431}
3432
3433static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3434{
3435        u32 reg;
3436        bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3437
3438        /* IDR0 */
3439        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3440
3441        /* 2-level structures */
3442        if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3443                smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3444
3445        if (reg & IDR0_CD2L)
3446                smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3447
3448        /*
3449         * Translation table endianness.
3450         * We currently require the same endianness as the CPU, but this
3451         * could be changed later by adding a new IO_PGTABLE_QUIRK.
3452         */
3453        switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3454        case IDR0_TTENDIAN_MIXED:
3455                smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3456                break;
3457#ifdef __BIG_ENDIAN
3458        case IDR0_TTENDIAN_BE:
3459                smmu->features |= ARM_SMMU_FEAT_TT_BE;
3460                break;
3461#else
3462        case IDR0_TTENDIAN_LE:
3463                smmu->features |= ARM_SMMU_FEAT_TT_LE;
3464                break;
3465#endif
3466        default:
3467                dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3468                return -ENXIO;
3469        }
3470
3471        /* Boolean feature flags */
3472        if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3473                smmu->features |= ARM_SMMU_FEAT_PRI;
3474
3475        if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3476                smmu->features |= ARM_SMMU_FEAT_ATS;
3477
3478        if (reg & IDR0_SEV)
3479                smmu->features |= ARM_SMMU_FEAT_SEV;
3480
3481        if (reg & IDR0_MSI) {
3482                smmu->features |= ARM_SMMU_FEAT_MSI;
3483                if (coherent && !disable_msipolling)
3484                        smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3485        }
3486
3487        if (reg & IDR0_HYP) {
3488                smmu->features |= ARM_SMMU_FEAT_HYP;
3489                if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3490                        smmu->features |= ARM_SMMU_FEAT_E2H;
3491        }
3492
3493        /*
3494         * The coherency feature as set by FW is used in preference to the ID
3495         * register, but warn on mismatch.
3496         */
3497        if (!!(reg & IDR0_COHACC) != coherent)
3498                dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3499                         coherent ? "true" : "false");
3500
3501        switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3502        case IDR0_STALL_MODEL_FORCE:
3503                smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3504                fallthrough;
3505        case IDR0_STALL_MODEL_STALL:
3506                smmu->features |= ARM_SMMU_FEAT_STALLS;
3507        }
3508
3509        if (reg & IDR0_S1P)
3510                smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3511
3512        if (reg & IDR0_S2P)
3513                smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3514
3515        if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3516                dev_err(smmu->dev, "no translation support!\n");
3517                return -ENXIO;
3518        }
3519
3520        /* We only support the AArch64 table format at present */
3521        switch (FIELD_GET(IDR0_TTF, reg)) {
3522        case IDR0_TTF_AARCH32_64:
3523                smmu->ias = 40;
3524                fallthrough;
3525        case IDR0_TTF_AARCH64:
3526                break;
3527        default:
3528                dev_err(smmu->dev, "AArch64 table format not supported!\n");
3529                return -ENXIO;
3530        }
3531
3532        /* ASID/VMID sizes */
3533        smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3534        smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3535
3536        /* IDR1 */
3537        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3538        if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3539                dev_err(smmu->dev, "embedded implementation not supported\n");
3540                return -ENXIO;
3541        }
3542
3543        /* Queue sizes, capped to ensure natural alignment */
3544        smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3545                                             FIELD_GET(IDR1_CMDQS, reg));
3546        if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3547                /*
3548                 * We don't support splitting up batches, so one batch of
3549                 * commands plus an extra sync needs to fit inside the command
3550                 * queue. There's also no way we can handle the weird alignment
3551                 * restrictions on the base pointer for a unit-length queue.
3552                 */
3553                dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3554                        CMDQ_BATCH_ENTRIES);
3555                return -ENXIO;
3556        }
3557
3558        smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3559                                             FIELD_GET(IDR1_EVTQS, reg));
3560        smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3561                                             FIELD_GET(IDR1_PRIQS, reg));
3562
3563        /* SID/SSID sizes */
3564        smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3565        smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3566
3567        /*
3568         * If the SMMU supports fewer bits than would fill a single L2 stream
3569         * table, use a linear table instead.
3570         */
3571        if (smmu->sid_bits <= STRTAB_SPLIT)
3572                smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3573
3574        /* IDR3 */
3575        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3576        if (FIELD_GET(IDR3_RIL, reg))
3577                smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3578
3579        /* IDR5 */
3580        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3581
3582        /* Maximum number of outstanding stalls */
3583        smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3584
3585        /* Page sizes */
3586        if (reg & IDR5_GRAN64K)
3587                smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3588        if (reg & IDR5_GRAN16K)
3589                smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3590        if (reg & IDR5_GRAN4K)
3591                smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3592
3593        /* Input address size */
3594        if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3595                smmu->features |= ARM_SMMU_FEAT_VAX;
3596
3597        /* Output address size */
3598        switch (FIELD_GET(IDR5_OAS, reg)) {
3599        case IDR5_OAS_32_BIT:
3600                smmu->oas = 32;
3601                break;
3602        case IDR5_OAS_36_BIT:
3603                smmu->oas = 36;
3604                break;
3605        case IDR5_OAS_40_BIT:
3606                smmu->oas = 40;
3607                break;
3608        case IDR5_OAS_42_BIT:
3609                smmu->oas = 42;
3610                break;
3611        case IDR5_OAS_44_BIT:
3612                smmu->oas = 44;
3613                break;
3614        case IDR5_OAS_52_BIT:
3615                smmu->oas = 52;
3616                smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3617                break;
3618        default:
3619                dev_info(smmu->dev,
3620                        "unknown output address size. Truncating to 48-bit\n");
3621                fallthrough;
3622        case IDR5_OAS_48_BIT:
3623                smmu->oas = 48;
3624        }
3625
3626        if (arm_smmu_ops.pgsize_bitmap == -1UL)
3627                arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3628        else
3629                arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3630
3631        /* Set the DMA mask for our table walker */
3632        if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3633                dev_warn(smmu->dev,
3634                         "failed to set DMA mask for table walker\n");
3635
3636        smmu->ias = max(smmu->ias, smmu->oas);
3637
3638        if (arm_smmu_sva_supported(smmu))
3639                smmu->features |= ARM_SMMU_FEAT_SVA;
3640
3641        dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3642                 smmu->ias, smmu->oas, smmu->features);
3643        return 0;
3644}
3645
3646#ifdef CONFIG_ACPI
3647static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3648{
3649        switch (model) {
3650        case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3651                smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3652                break;
3653        case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3654                smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3655                break;
3656        }
3657
3658        dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3659}
3660
3661static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3662                                      struct arm_smmu_device *smmu)
3663{
3664        struct acpi_iort_smmu_v3 *iort_smmu;
3665        struct device *dev = smmu->dev;
3666        struct acpi_iort_node *node;
3667
3668        node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3669
3670        /* Retrieve SMMUv3 specific data */
3671        iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3672
3673        acpi_smmu_get_options(iort_smmu->model, smmu);
3674
3675        if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3676                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3677
3678        return 0;
3679}
3680#else
3681static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3682                                             struct arm_smmu_device *smmu)
3683{
3684        return -ENODEV;
3685}
3686#endif
3687
3688static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3689                                    struct arm_smmu_device *smmu)
3690{
3691        struct device *dev = &pdev->dev;
3692        u32 cells;
3693        int ret = -EINVAL;
3694
3695        if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3696                dev_err(dev, "missing #iommu-cells property\n");
3697        else if (cells != 1)
3698                dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3699        else
3700                ret = 0;
3701
3702        parse_driver_options(smmu);
3703
3704        if (of_dma_is_coherent(dev->of_node))
3705                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3706
3707        return ret;
3708}
3709
3710static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3711{
3712        if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3713                return SZ_64K;
3714        else
3715                return SZ_128K;
3716}
3717
3718static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3719{
3720        int err;
3721
3722#ifdef CONFIG_PCI
3723        if (pci_bus_type.iommu_ops != ops) {
3724                err = bus_set_iommu(&pci_bus_type, ops);
3725                if (err)
3726                        return err;
3727        }
3728#endif
3729#ifdef CONFIG_ARM_AMBA
3730        if (amba_bustype.iommu_ops != ops) {
3731                err = bus_set_iommu(&amba_bustype, ops);
3732                if (err)
3733                        goto err_reset_pci_ops;
3734        }
3735#endif
3736        if (platform_bus_type.iommu_ops != ops) {
3737                err = bus_set_iommu(&platform_bus_type, ops);
3738                if (err)
3739                        goto err_reset_amba_ops;
3740        }
3741
3742        return 0;
3743
3744err_reset_amba_ops:
3745#ifdef CONFIG_ARM_AMBA
3746        bus_set_iommu(&amba_bustype, NULL);
3747#endif
3748err_reset_pci_ops: __maybe_unused;
3749#ifdef CONFIG_PCI
3750        bus_set_iommu(&pci_bus_type, NULL);
3751#endif
3752        return err;
3753}
3754
3755static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3756                                      resource_size_t size)
3757{
3758        struct resource res = DEFINE_RES_MEM(start, size);
3759
3760        return devm_ioremap_resource(dev, &res);
3761}
3762
3763static int arm_smmu_device_probe(struct platform_device *pdev)
3764{
3765        int irq, ret;
3766        struct resource *res;
3767        resource_size_t ioaddr;
3768        struct arm_smmu_device *smmu;
3769        struct device *dev = &pdev->dev;
3770        bool bypass;
3771
3772        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3773        if (!smmu)
3774                return -ENOMEM;
3775        smmu->dev = dev;
3776
3777        if (dev->of_node) {
3778                ret = arm_smmu_device_dt_probe(pdev, smmu);
3779        } else {
3780                ret = arm_smmu_device_acpi_probe(pdev, smmu);
3781                if (ret == -ENODEV)
3782                        return ret;
3783        }
3784
3785        /* Set bypass mode according to firmware probing result */
3786        bypass = !!ret;
3787
3788        /* Base address */
3789        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3790        if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3791                dev_err(dev, "MMIO region too small (%pr)\n", res);
3792                return -EINVAL;
3793        }
3794        ioaddr = res->start;
3795
3796        /*
3797         * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3798         * the PMCG registers which are reserved by the PMU driver.
3799         */
3800        smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3801        if (IS_ERR(smmu->base))
3802                return PTR_ERR(smmu->base);
3803
3804        if (arm_smmu_resource_size(smmu) > SZ_64K) {
3805                smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3806                                               ARM_SMMU_REG_SZ);
3807                if (IS_ERR(smmu->page1))
3808                        return PTR_ERR(smmu->page1);
3809        } else {
3810                smmu->page1 = smmu->base;
3811        }
3812
3813        /* Interrupt lines */
3814
3815        irq = platform_get_irq_byname_optional(pdev, "combined");
3816        if (irq > 0)
3817                smmu->combined_irq = irq;
3818        else {
3819                irq = platform_get_irq_byname_optional(pdev, "eventq");
3820                if (irq > 0)
3821                        smmu->evtq.q.irq = irq;
3822
3823                irq = platform_get_irq_byname_optional(pdev, "priq");
3824                if (irq > 0)
3825                        smmu->priq.q.irq = irq;
3826
3827                irq = platform_get_irq_byname_optional(pdev, "gerror");
3828                if (irq > 0)
3829                        smmu->gerr_irq = irq;
3830        }
3831        /* Probe the h/w */
3832        ret = arm_smmu_device_hw_probe(smmu);
3833        if (ret)
3834                return ret;
3835
3836        /* Initialise in-memory data structures */
3837        ret = arm_smmu_init_structures(smmu);
3838        if (ret)
3839                return ret;
3840
3841        /* Record our private device structure */
3842        platform_set_drvdata(pdev, smmu);
3843
3844        /* Reset the device */
3845        ret = arm_smmu_device_reset(smmu, bypass);
3846        if (ret)
3847                return ret;
3848
3849        /* And we're up. Go go go! */
3850        ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3851                                     "smmu3.%pa", &ioaddr);
3852        if (ret)
3853                return ret;
3854
3855        ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3856        if (ret) {
3857                dev_err(dev, "Failed to register iommu\n");
3858                goto err_sysfs_remove;
3859        }
3860
3861        ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3862        if (ret)
3863                goto err_unregister_device;
3864
3865        return 0;
3866
3867err_unregister_device:
3868        iommu_device_unregister(&smmu->iommu);
3869err_sysfs_remove:
3870        iommu_device_sysfs_remove(&smmu->iommu);
3871        return ret;
3872}
3873
3874static int arm_smmu_device_remove(struct platform_device *pdev)
3875{
3876        struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3877
3878        arm_smmu_set_bus_ops(NULL);
3879        iommu_device_unregister(&smmu->iommu);
3880        iommu_device_sysfs_remove(&smmu->iommu);
3881        arm_smmu_device_disable(smmu);
3882        iopf_queue_free(smmu->evtq.iopf);
3883
3884        return 0;
3885}
3886
3887static void arm_smmu_device_shutdown(struct platform_device *pdev)
3888{
3889        arm_smmu_device_remove(pdev);
3890}
3891
3892static const struct of_device_id arm_smmu_of_match[] = {
3893        { .compatible = "arm,smmu-v3", },
3894        { },
3895};
3896MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3897
3898static void arm_smmu_driver_unregister(struct platform_driver *drv)
3899{
3900        arm_smmu_sva_notifier_synchronize();
3901        platform_driver_unregister(drv);
3902}
3903
3904static struct platform_driver arm_smmu_driver = {
3905        .driver = {
3906                .name                   = "arm-smmu-v3",
3907                .of_match_table         = arm_smmu_of_match,
3908                .suppress_bind_attrs    = true,
3909        },
3910        .probe  = arm_smmu_device_probe,
3911        .remove = arm_smmu_device_remove,
3912        .shutdown = arm_smmu_device_shutdown,
3913};
3914module_driver(arm_smmu_driver, platform_driver_register,
3915              arm_smmu_driver_unregister);
3916
3917MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3918MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3919MODULE_ALIAS("platform:arm-smmu-v3");
3920MODULE_LICENSE("GPL v2");
3921