linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * IOMMU API for ARM architected SMMUv3 implementations.
   4 *
   5 * Copyright (C) 2015 ARM Limited
   6 *
   7 * Author: Will Deacon <will.deacon@arm.com>
   8 *
   9 * This driver is powered by bad coffee and bombay mix.
  10 */
  11
  12#include <linux/acpi.h>
  13#include <linux/acpi_iort.h>
  14#include <linux/bitops.h>
  15#include <linux/crash_dump.h>
  16#include <linux/delay.h>
  17#include <linux/dma-iommu.h>
  18#include <linux/err.h>
  19#include <linux/interrupt.h>
  20#include <linux/io-pgtable.h>
  21#include <linux/iopoll.h>
  22#include <linux/module.h>
  23#include <linux/msi.h>
  24#include <linux/of.h>
  25#include <linux/of_address.h>
  26#include <linux/of_platform.h>
  27#include <linux/pci.h>
  28#include <linux/pci-ats.h>
  29#include <linux/platform_device.h>
  30
  31#include <linux/amba/bus.h>
  32
  33#include "arm-smmu-v3.h"
  34#include "../../iommu-sva-lib.h"
  35
  36static bool disable_bypass = true;
  37module_param(disable_bypass, bool, 0444);
  38MODULE_PARM_DESC(disable_bypass,
  39        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
  40
  41static bool disable_msipolling;
  42module_param(disable_msipolling, bool, 0444);
  43MODULE_PARM_DESC(disable_msipolling,
  44        "Disable MSI-based polling for CMD_SYNC completion.");
  45
  46enum arm_smmu_msi_index {
  47        EVTQ_MSI_INDEX,
  48        GERROR_MSI_INDEX,
  49        PRIQ_MSI_INDEX,
  50        ARM_SMMU_MAX_MSIS,
  51};
  52
  53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
  54        [EVTQ_MSI_INDEX] = {
  55                ARM_SMMU_EVTQ_IRQ_CFG0,
  56                ARM_SMMU_EVTQ_IRQ_CFG1,
  57                ARM_SMMU_EVTQ_IRQ_CFG2,
  58        },
  59        [GERROR_MSI_INDEX] = {
  60                ARM_SMMU_GERROR_IRQ_CFG0,
  61                ARM_SMMU_GERROR_IRQ_CFG1,
  62                ARM_SMMU_GERROR_IRQ_CFG2,
  63        },
  64        [PRIQ_MSI_INDEX] = {
  65                ARM_SMMU_PRIQ_IRQ_CFG0,
  66                ARM_SMMU_PRIQ_IRQ_CFG1,
  67                ARM_SMMU_PRIQ_IRQ_CFG2,
  68        },
  69};
  70
  71struct arm_smmu_option_prop {
  72        u32 opt;
  73        const char *prop;
  74};
  75
  76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
  77DEFINE_MUTEX(arm_smmu_asid_lock);
  78
  79/*
  80 * Special value used by SVA when a process dies, to quiesce a CD without
  81 * disabling it.
  82 */
  83struct arm_smmu_ctx_desc quiet_cd = { 0 };
  84
  85static struct arm_smmu_option_prop arm_smmu_options[] = {
  86        { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
  87        { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
  88        { 0, NULL},
  89};
  90
  91static void parse_driver_options(struct arm_smmu_device *smmu)
  92{
  93        int i = 0;
  94
  95        do {
  96                if (of_property_read_bool(smmu->dev->of_node,
  97                                                arm_smmu_options[i].prop)) {
  98                        smmu->options |= arm_smmu_options[i].opt;
  99                        dev_notice(smmu->dev, "option %s\n",
 100                                arm_smmu_options[i].prop);
 101                }
 102        } while (arm_smmu_options[++i].opt);
 103}
 104
 105/* Low-level queue manipulation functions */
 106static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
 107{
 108        u32 space, prod, cons;
 109
 110        prod = Q_IDX(q, q->prod);
 111        cons = Q_IDX(q, q->cons);
 112
 113        if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
 114                space = (1 << q->max_n_shift) - (prod - cons);
 115        else
 116                space = cons - prod;
 117
 118        return space >= n;
 119}
 120
 121static bool queue_full(struct arm_smmu_ll_queue *q)
 122{
 123        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 124               Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
 125}
 126
 127static bool queue_empty(struct arm_smmu_ll_queue *q)
 128{
 129        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 130               Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
 131}
 132
 133static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
 134{
 135        return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
 136                (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
 137               ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
 138                (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
 139}
 140
 141static void queue_sync_cons_out(struct arm_smmu_queue *q)
 142{
 143        /*
 144         * Ensure that all CPU accesses (reads and writes) to the queue
 145         * are complete before we update the cons pointer.
 146         */
 147        __iomb();
 148        writel_relaxed(q->llq.cons, q->cons_reg);
 149}
 150
 151static void queue_inc_cons(struct arm_smmu_ll_queue *q)
 152{
 153        u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
 154        q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
 155}
 156
 157static int queue_sync_prod_in(struct arm_smmu_queue *q)
 158{
 159        u32 prod;
 160        int ret = 0;
 161
 162        /*
 163         * We can't use the _relaxed() variant here, as we must prevent
 164         * speculative reads of the queue before we have determined that
 165         * prod has indeed moved.
 166         */
 167        prod = readl(q->prod_reg);
 168
 169        if (Q_OVF(prod) != Q_OVF(q->llq.prod))
 170                ret = -EOVERFLOW;
 171
 172        q->llq.prod = prod;
 173        return ret;
 174}
 175
 176static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
 177{
 178        u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
 179        return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
 180}
 181
 182static void queue_poll_init(struct arm_smmu_device *smmu,
 183                            struct arm_smmu_queue_poll *qp)
 184{
 185        qp->delay = 1;
 186        qp->spin_cnt = 0;
 187        qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 188        qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 189}
 190
 191static int queue_poll(struct arm_smmu_queue_poll *qp)
 192{
 193        if (ktime_compare(ktime_get(), qp->timeout) > 0)
 194                return -ETIMEDOUT;
 195
 196        if (qp->wfe) {
 197                wfe();
 198        } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
 199                cpu_relax();
 200        } else {
 201                udelay(qp->delay);
 202                qp->delay *= 2;
 203                qp->spin_cnt = 0;
 204        }
 205
 206        return 0;
 207}
 208
 209static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
 210{
 211        int i;
 212
 213        for (i = 0; i < n_dwords; ++i)
 214                *dst++ = cpu_to_le64(*src++);
 215}
 216
 217static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
 218{
 219        int i;
 220
 221        for (i = 0; i < n_dwords; ++i)
 222                *dst++ = le64_to_cpu(*src++);
 223}
 224
 225static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 226{
 227        if (queue_empty(&q->llq))
 228                return -EAGAIN;
 229
 230        queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
 231        queue_inc_cons(&q->llq);
 232        queue_sync_cons_out(q);
 233        return 0;
 234}
 235
 236/* High-level queue accessors */
 237static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 238{
 239        memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
 240        cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 241
 242        switch (ent->opcode) {
 243        case CMDQ_OP_TLBI_EL2_ALL:
 244        case CMDQ_OP_TLBI_NSNH_ALL:
 245                break;
 246        case CMDQ_OP_PREFETCH_CFG:
 247                cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
 248                break;
 249        case CMDQ_OP_CFGI_CD:
 250                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
 251                fallthrough;
 252        case CMDQ_OP_CFGI_STE:
 253                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 254                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 255                break;
 256        case CMDQ_OP_CFGI_CD_ALL:
 257                cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 258                break;
 259        case CMDQ_OP_CFGI_ALL:
 260                /* Cover the entire SID range */
 261                cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
 262                break;
 263        case CMDQ_OP_TLBI_NH_VA:
 264                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 265                fallthrough;
 266        case CMDQ_OP_TLBI_EL2_VA:
 267                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 268                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 269                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 270                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 271                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 272                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 273                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 274                break;
 275        case CMDQ_OP_TLBI_S2_IPA:
 276                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
 277                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
 278                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 279                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 280                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
 281                cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
 282                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 283                break;
 284        case CMDQ_OP_TLBI_NH_ASID:
 285                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 286                fallthrough;
 287        case CMDQ_OP_TLBI_S12_VMALL:
 288                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 289                break;
 290        case CMDQ_OP_TLBI_EL2_ASID:
 291                cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 292                break;
 293        case CMDQ_OP_ATC_INV:
 294                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 295                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
 296                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
 297                cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
 298                cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
 299                cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
 300                break;
 301        case CMDQ_OP_PRI_RESP:
 302                cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
 303                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
 304                cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
 305                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
 306                switch (ent->pri.resp) {
 307                case PRI_RESP_DENY:
 308                case PRI_RESP_FAIL:
 309                case PRI_RESP_SUCC:
 310                        break;
 311                default:
 312                        return -EINVAL;
 313                }
 314                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 315                break;
 316        case CMDQ_OP_RESUME:
 317                cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
 318                cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
 319                cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
 320                break;
 321        case CMDQ_OP_CMD_SYNC:
 322                if (ent->sync.msiaddr) {
 323                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 324                        cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 325                } else {
 326                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
 327                }
 328                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
 329                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 330                break;
 331        default:
 332                return -ENOENT;
 333        }
 334
 335        return 0;
 336}
 337
 338static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
 339{
 340        return &smmu->cmdq;
 341}
 342
 343static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 344                                         struct arm_smmu_queue *q, u32 prod)
 345{
 346        struct arm_smmu_cmdq_ent ent = {
 347                .opcode = CMDQ_OP_CMD_SYNC,
 348        };
 349
 350        /*
 351         * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 352         * payload, so the write will zero the entire command on that platform.
 353         */
 354        if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
 355                ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 356                                   q->ent_dwords * 8;
 357        }
 358
 359        arm_smmu_cmdq_build_cmd(cmd, &ent);
 360}
 361
 362static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 363                                     struct arm_smmu_queue *q)
 364{
 365        static const char * const cerror_str[] = {
 366                [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
 367                [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
 368                [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
 369                [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
 370        };
 371
 372        int i;
 373        u64 cmd[CMDQ_ENT_DWORDS];
 374        u32 cons = readl_relaxed(q->cons_reg);
 375        u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 376        struct arm_smmu_cmdq_ent cmd_sync = {
 377                .opcode = CMDQ_OP_CMD_SYNC,
 378        };
 379
 380        dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
 381                idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
 382
 383        switch (idx) {
 384        case CMDQ_ERR_CERROR_ABT_IDX:
 385                dev_err(smmu->dev, "retrying command fetch\n");
 386                return;
 387        case CMDQ_ERR_CERROR_NONE_IDX:
 388                return;
 389        case CMDQ_ERR_CERROR_ATC_INV_IDX:
 390                /*
 391                 * ATC Invalidation Completion timeout. CONS is still pointing
 392                 * at the CMD_SYNC. Attempt to complete other pending commands
 393                 * by repeating the CMD_SYNC, though we might well end up back
 394                 * here since the ATC invalidation may still be pending.
 395                 */
 396                return;
 397        case CMDQ_ERR_CERROR_ILL_IDX:
 398        default:
 399                break;
 400        }
 401
 402        /*
 403         * We may have concurrent producers, so we need to be careful
 404         * not to touch any of the shadow cmdq state.
 405         */
 406        queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
 407        dev_err(smmu->dev, "skipping command in error state:\n");
 408        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 409                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 410
 411        /* Convert the erroneous command into a CMD_SYNC */
 412        if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
 413                dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
 414                return;
 415        }
 416
 417        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 418}
 419
 420static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 421{
 422        __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
 423}
 424
 425/*
 426 * Command queue locking.
 427 * This is a form of bastardised rwlock with the following major changes:
 428 *
 429 * - The only LOCK routines are exclusive_trylock() and shared_lock().
 430 *   Neither have barrier semantics, and instead provide only a control
 431 *   dependency.
 432 *
 433 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
 434 *   fails if the caller appears to be the last lock holder (yes, this is
 435 *   racy). All successful UNLOCK routines have RELEASE semantics.
 436 */
 437static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
 438{
 439        int val;
 440
 441        /*
 442         * We can try to avoid the cmpxchg() loop by simply incrementing the
 443         * lock counter. When held in exclusive state, the lock counter is set
 444         * to INT_MIN so these increments won't hurt as the value will remain
 445         * negative.
 446         */
 447        if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
 448                return;
 449
 450        do {
 451                val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
 452        } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
 453}
 454
 455static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
 456{
 457        (void)atomic_dec_return_release(&cmdq->lock);
 458}
 459
 460static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
 461{
 462        if (atomic_read(&cmdq->lock) == 1)
 463                return false;
 464
 465        arm_smmu_cmdq_shared_unlock(cmdq);
 466        return true;
 467}
 468
 469#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
 470({                                                                      \
 471        bool __ret;                                                     \
 472        local_irq_save(flags);                                          \
 473        __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
 474        if (!__ret)                                                     \
 475                local_irq_restore(flags);                               \
 476        __ret;                                                          \
 477})
 478
 479#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
 480({                                                                      \
 481        atomic_set_release(&cmdq->lock, 0);                             \
 482        local_irq_restore(flags);                                       \
 483})
 484
 485
 486/*
 487 * Command queue insertion.
 488 * This is made fiddly by our attempts to achieve some sort of scalability
 489 * since there is one queue shared amongst all of the CPUs in the system.  If
 490 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
 491 * then you'll *love* this monstrosity.
 492 *
 493 * The basic idea is to split the queue up into ranges of commands that are
 494 * owned by a given CPU; the owner may not have written all of the commands
 495 * itself, but is responsible for advancing the hardware prod pointer when
 496 * the time comes. The algorithm is roughly:
 497 *
 498 *      1. Allocate some space in the queue. At this point we also discover
 499 *         whether the head of the queue is currently owned by another CPU,
 500 *         or whether we are the owner.
 501 *
 502 *      2. Write our commands into our allocated slots in the queue.
 503 *
 504 *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
 505 *
 506 *      4. If we are an owner:
 507 *              a. Wait for the previous owner to finish.
 508 *              b. Mark the queue head as unowned, which tells us the range
 509 *                 that we are responsible for publishing.
 510 *              c. Wait for all commands in our owned range to become valid.
 511 *              d. Advance the hardware prod pointer.
 512 *              e. Tell the next owner we've finished.
 513 *
 514 *      5. If we are inserting a CMD_SYNC (we may or may not have been an
 515 *         owner), then we need to stick around until it has completed:
 516 *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
 517 *                 to clear the first 4 bytes.
 518 *              b. Otherwise, we spin waiting for the hardware cons pointer to
 519 *                 advance past our command.
 520 *
 521 * The devil is in the details, particularly the use of locking for handling
 522 * SYNC completion and freeing up space in the queue before we think that it is
 523 * full.
 524 */
 525static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
 526                                               u32 sprod, u32 eprod, bool set)
 527{
 528        u32 swidx, sbidx, ewidx, ebidx;
 529        struct arm_smmu_ll_queue llq = {
 530                .max_n_shift    = cmdq->q.llq.max_n_shift,
 531                .prod           = sprod,
 532        };
 533
 534        ewidx = BIT_WORD(Q_IDX(&llq, eprod));
 535        ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
 536
 537        while (llq.prod != eprod) {
 538                unsigned long mask;
 539                atomic_long_t *ptr;
 540                u32 limit = BITS_PER_LONG;
 541
 542                swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
 543                sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
 544
 545                ptr = &cmdq->valid_map[swidx];
 546
 547                if ((swidx == ewidx) && (sbidx < ebidx))
 548                        limit = ebidx;
 549
 550                mask = GENMASK(limit - 1, sbidx);
 551
 552                /*
 553                 * The valid bit is the inverse of the wrap bit. This means
 554                 * that a zero-initialised queue is invalid and, after marking
 555                 * all entries as valid, they become invalid again when we
 556                 * wrap.
 557                 */
 558                if (set) {
 559                        atomic_long_xor(mask, ptr);
 560                } else { /* Poll */
 561                        unsigned long valid;
 562
 563                        valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
 564                        atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
 565                }
 566
 567                llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
 568        }
 569}
 570
 571/* Mark all entries in the range [sprod, eprod) as valid */
 572static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
 573                                        u32 sprod, u32 eprod)
 574{
 575        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
 576}
 577
 578/* Wait for all entries in the range [sprod, eprod) to become valid */
 579static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
 580                                         u32 sprod, u32 eprod)
 581{
 582        __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
 583}
 584
 585/* Wait for the command queue to become non-full */
 586static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 587                                             struct arm_smmu_ll_queue *llq)
 588{
 589        unsigned long flags;
 590        struct arm_smmu_queue_poll qp;
 591        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 592        int ret = 0;
 593
 594        /*
 595         * Try to update our copy of cons by grabbing exclusive cmdq access. If
 596         * that fails, spin until somebody else updates it for us.
 597         */
 598        if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
 599                WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
 600                arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
 601                llq->val = READ_ONCE(cmdq->q.llq.val);
 602                return 0;
 603        }
 604
 605        queue_poll_init(smmu, &qp);
 606        do {
 607                llq->val = READ_ONCE(cmdq->q.llq.val);
 608                if (!queue_full(llq))
 609                        break;
 610
 611                ret = queue_poll(&qp);
 612        } while (!ret);
 613
 614        return ret;
 615}
 616
 617/*
 618 * Wait until the SMMU signals a CMD_SYNC completion MSI.
 619 * Must be called with the cmdq lock held in some capacity.
 620 */
 621static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 622                                          struct arm_smmu_ll_queue *llq)
 623{
 624        int ret = 0;
 625        struct arm_smmu_queue_poll qp;
 626        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 627        u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 628
 629        queue_poll_init(smmu, &qp);
 630
 631        /*
 632         * The MSI won't generate an event, since it's being written back
 633         * into the command queue.
 634         */
 635        qp.wfe = false;
 636        smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
 637        llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
 638        return ret;
 639}
 640
 641/*
 642 * Wait until the SMMU cons index passes llq->prod.
 643 * Must be called with the cmdq lock held in some capacity.
 644 */
 645static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 646                                               struct arm_smmu_ll_queue *llq)
 647{
 648        struct arm_smmu_queue_poll qp;
 649        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 650        u32 prod = llq->prod;
 651        int ret = 0;
 652
 653        queue_poll_init(smmu, &qp);
 654        llq->val = READ_ONCE(cmdq->q.llq.val);
 655        do {
 656                if (queue_consumed(llq, prod))
 657                        break;
 658
 659                ret = queue_poll(&qp);
 660
 661                /*
 662                 * This needs to be a readl() so that our subsequent call
 663                 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
 664                 *
 665                 * Specifically, we need to ensure that we observe all
 666                 * shared_lock()s by other CMD_SYNCs that share our owner,
 667                 * so that a failing call to tryunlock() means that we're
 668                 * the last one out and therefore we can safely advance
 669                 * cmdq->q.llq.cons. Roughly speaking:
 670                 *
 671                 * CPU 0                CPU1                    CPU2 (us)
 672                 *
 673                 * if (sync)
 674                 *      shared_lock();
 675                 *
 676                 * dma_wmb();
 677                 * set_valid_map();
 678                 *
 679                 *                      if (owner) {
 680                 *                              poll_valid_map();
 681                 *                              <control dependency>
 682                 *                              writel(prod_reg);
 683                 *
 684                 *                                              readl(cons_reg);
 685                 *                                              tryunlock();
 686                 *
 687                 * Requires us to see CPU 0's shared_lock() acquisition.
 688                 */
 689                llq->cons = readl(cmdq->q.cons_reg);
 690        } while (!ret);
 691
 692        return ret;
 693}
 694
 695static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 696                                         struct arm_smmu_ll_queue *llq)
 697{
 698        if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
 699                return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 700
 701        return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
 702}
 703
 704static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
 705                                        u32 prod, int n)
 706{
 707        int i;
 708        struct arm_smmu_ll_queue llq = {
 709                .max_n_shift    = cmdq->q.llq.max_n_shift,
 710                .prod           = prod,
 711        };
 712
 713        for (i = 0; i < n; ++i) {
 714                u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
 715
 716                prod = queue_inc_prod_n(&llq, i);
 717                queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
 718        }
 719}
 720
 721/*
 722 * This is the actual insertion function, and provides the following
 723 * ordering guarantees to callers:
 724 *
 725 * - There is a dma_wmb() before publishing any commands to the queue.
 726 *   This can be relied upon to order prior writes to data structures
 727 *   in memory (such as a CD or an STE) before the command.
 728 *
 729 * - On completion of a CMD_SYNC, there is a control dependency.
 730 *   This can be relied upon to order subsequent writes to memory (e.g.
 731 *   freeing an IOVA) after completion of the CMD_SYNC.
 732 *
 733 * - Command insertion is totally ordered, so if two CPUs each race to
 734 *   insert their own list of commands then all of the commands from one
 735 *   CPU will appear before any of the commands from the other CPU.
 736 */
 737static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 738                                       u64 *cmds, int n, bool sync)
 739{
 740        u64 cmd_sync[CMDQ_ENT_DWORDS];
 741        u32 prod;
 742        unsigned long flags;
 743        bool owner;
 744        struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 745        struct arm_smmu_ll_queue llq, head;
 746        int ret = 0;
 747
 748        llq.max_n_shift = cmdq->q.llq.max_n_shift;
 749
 750        /* 1. Allocate some space in the queue */
 751        local_irq_save(flags);
 752        llq.val = READ_ONCE(cmdq->q.llq.val);
 753        do {
 754                u64 old;
 755
 756                while (!queue_has_space(&llq, n + sync)) {
 757                        local_irq_restore(flags);
 758                        if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
 759                                dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 760                        local_irq_save(flags);
 761                }
 762
 763                head.cons = llq.cons;
 764                head.prod = queue_inc_prod_n(&llq, n + sync) |
 765                                             CMDQ_PROD_OWNED_FLAG;
 766
 767                old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
 768                if (old == llq.val)
 769                        break;
 770
 771                llq.val = old;
 772        } while (1);
 773        owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
 774        head.prod &= ~CMDQ_PROD_OWNED_FLAG;
 775        llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
 776
 777        /*
 778         * 2. Write our commands into the queue
 779         * Dependency ordering from the cmpxchg() loop above.
 780         */
 781        arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 782        if (sync) {
 783                prod = queue_inc_prod_n(&llq, n);
 784                arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
 785                queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 786
 787                /*
 788                 * In order to determine completion of our CMD_SYNC, we must
 789                 * ensure that the queue can't wrap twice without us noticing.
 790                 * We achieve that by taking the cmdq lock as shared before
 791                 * marking our slot as valid.
 792                 */
 793                arm_smmu_cmdq_shared_lock(cmdq);
 794        }
 795
 796        /* 3. Mark our slots as valid, ensuring commands are visible first */
 797        dma_wmb();
 798        arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
 799
 800        /* 4. If we are the owner, take control of the SMMU hardware */
 801        if (owner) {
 802                /* a. Wait for previous owner to finish */
 803                atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
 804
 805                /* b. Stop gathering work by clearing the owned flag */
 806                prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
 807                                                   &cmdq->q.llq.atomic.prod);
 808                prod &= ~CMDQ_PROD_OWNED_FLAG;
 809
 810                /*
 811                 * c. Wait for any gathered work to be written to the queue.
 812                 * Note that we read our own entries so that we have the control
 813                 * dependency required by (d).
 814                 */
 815                arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
 816
 817                /*
 818                 * d. Advance the hardware prod pointer
 819                 * Control dependency ordering from the entries becoming valid.
 820                 */
 821                writel_relaxed(prod, cmdq->q.prod_reg);
 822
 823                /*
 824                 * e. Tell the next owner we're done
 825                 * Make sure we've updated the hardware first, so that we don't
 826                 * race to update prod and potentially move it backwards.
 827                 */
 828                atomic_set_release(&cmdq->owner_prod, prod);
 829        }
 830
 831        /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
 832        if (sync) {
 833                llq.prod = queue_inc_prod_n(&llq, n);
 834                ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
 835                if (ret) {
 836                        dev_err_ratelimited(smmu->dev,
 837                                            "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
 838                                            llq.prod,
 839                                            readl_relaxed(cmdq->q.prod_reg),
 840                                            readl_relaxed(cmdq->q.cons_reg));
 841                }
 842
 843                /*
 844                 * Try to unlock the cmdq lock. This will fail if we're the last
 845                 * reader, in which case we can safely update cmdq->q.llq.cons
 846                 */
 847                if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
 848                        WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
 849                        arm_smmu_cmdq_shared_unlock(cmdq);
 850                }
 851        }
 852
 853        local_irq_restore(flags);
 854        return ret;
 855}
 856
 857static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 858                                     struct arm_smmu_cmdq_ent *ent,
 859                                     bool sync)
 860{
 861        u64 cmd[CMDQ_ENT_DWORDS];
 862
 863        if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
 864                dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
 865                         ent->opcode);
 866                return -EINVAL;
 867        }
 868
 869        return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
 870}
 871
 872static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 873                                   struct arm_smmu_cmdq_ent *ent)
 874{
 875        return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
 876}
 877
 878static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
 879                                             struct arm_smmu_cmdq_ent *ent)
 880{
 881        return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
 882}
 883
 884static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 885                                    struct arm_smmu_cmdq_batch *cmds,
 886                                    struct arm_smmu_cmdq_ent *cmd)
 887{
 888        if (cmds->num == CMDQ_BATCH_ENTRIES) {
 889                arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
 890                cmds->num = 0;
 891        }
 892        arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
 893        cmds->num++;
 894}
 895
 896static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
 897                                      struct arm_smmu_cmdq_batch *cmds)
 898{
 899        return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
 900}
 901
 902static int arm_smmu_page_response(struct device *dev,
 903                                  struct iommu_fault_event *unused,
 904                                  struct iommu_page_response *resp)
 905{
 906        struct arm_smmu_cmdq_ent cmd = {0};
 907        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
 908        int sid = master->streams[0].id;
 909
 910        if (master->stall_enabled) {
 911                cmd.opcode              = CMDQ_OP_RESUME;
 912                cmd.resume.sid          = sid;
 913                cmd.resume.stag         = resp->grpid;
 914                switch (resp->code) {
 915                case IOMMU_PAGE_RESP_INVALID:
 916                case IOMMU_PAGE_RESP_FAILURE:
 917                        cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
 918                        break;
 919                case IOMMU_PAGE_RESP_SUCCESS:
 920                        cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
 921                        break;
 922                default:
 923                        return -EINVAL;
 924                }
 925        } else {
 926                return -ENODEV;
 927        }
 928
 929        arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
 930        /*
 931         * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
 932         * RESUME consumption guarantees that the stalled transaction will be
 933         * terminated... at some point in the future. PRI_RESP is fire and
 934         * forget.
 935         */
 936
 937        return 0;
 938}
 939
 940/* Context descriptor manipulation functions */
 941void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 942{
 943        struct arm_smmu_cmdq_ent cmd = {
 944                .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
 945                        CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
 946                .tlbi.asid = asid,
 947        };
 948
 949        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 950}
 951
 952static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 953                             int ssid, bool leaf)
 954{
 955        size_t i;
 956        unsigned long flags;
 957        struct arm_smmu_master *master;
 958        struct arm_smmu_cmdq_batch cmds;
 959        struct arm_smmu_device *smmu = smmu_domain->smmu;
 960        struct arm_smmu_cmdq_ent cmd = {
 961                .opcode = CMDQ_OP_CFGI_CD,
 962                .cfgi   = {
 963                        .ssid   = ssid,
 964                        .leaf   = leaf,
 965                },
 966        };
 967
 968        cmds.num = 0;
 969
 970        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 971        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
 972                for (i = 0; i < master->num_streams; i++) {
 973                        cmd.cfgi.sid = master->streams[i].id;
 974                        arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 975                }
 976        }
 977        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 978
 979        arm_smmu_cmdq_batch_submit(smmu, &cmds);
 980}
 981
 982static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
 983                                        struct arm_smmu_l1_ctx_desc *l1_desc)
 984{
 985        size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
 986
 987        l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
 988                                             &l1_desc->l2ptr_dma, GFP_KERNEL);
 989        if (!l1_desc->l2ptr) {
 990                dev_warn(smmu->dev,
 991                         "failed to allocate context descriptor table\n");
 992                return -ENOMEM;
 993        }
 994        return 0;
 995}
 996
 997static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 998                                      struct arm_smmu_l1_ctx_desc *l1_desc)
 999{
1000        u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1001                  CTXDESC_L1_DESC_V;
1002
1003        /* See comment in arm_smmu_write_ctx_desc() */
1004        WRITE_ONCE(*dst, cpu_to_le64(val));
1005}
1006
1007static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1008                                   u32 ssid)
1009{
1010        __le64 *l1ptr;
1011        unsigned int idx;
1012        struct arm_smmu_l1_ctx_desc *l1_desc;
1013        struct arm_smmu_device *smmu = smmu_domain->smmu;
1014        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1015
1016        if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1017                return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1018
1019        idx = ssid >> CTXDESC_SPLIT;
1020        l1_desc = &cdcfg->l1_desc[idx];
1021        if (!l1_desc->l2ptr) {
1022                if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1023                        return NULL;
1024
1025                l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1026                arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1027                /* An invalid L1CD can be cached */
1028                arm_smmu_sync_cd(smmu_domain, ssid, false);
1029        }
1030        idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1031        return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1032}
1033
1034int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1035                            struct arm_smmu_ctx_desc *cd)
1036{
1037        /*
1038         * This function handles the following cases:
1039         *
1040         * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1041         * (2) Install a secondary CD, for SID+SSID traffic.
1042         * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1043         *     CD, then invalidate the old entry and mappings.
1044         * (4) Quiesce the context without clearing the valid bit. Disable
1045         *     translation, and ignore any translation fault.
1046         * (5) Remove a secondary CD.
1047         */
1048        u64 val;
1049        bool cd_live;
1050        __le64 *cdptr;
1051
1052        if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1053                return -E2BIG;
1054
1055        cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1056        if (!cdptr)
1057                return -ENOMEM;
1058
1059        val = le64_to_cpu(cdptr[0]);
1060        cd_live = !!(val & CTXDESC_CD_0_V);
1061
1062        if (!cd) { /* (5) */
1063                val = 0;
1064        } else if (cd == &quiet_cd) { /* (4) */
1065                val |= CTXDESC_CD_0_TCR_EPD0;
1066        } else if (cd_live) { /* (3) */
1067                val &= ~CTXDESC_CD_0_ASID;
1068                val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1069                /*
1070                 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1071                 * this substream's traffic
1072                 */
1073        } else { /* (1) and (2) */
1074                cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1075                cdptr[2] = 0;
1076                cdptr[3] = cpu_to_le64(cd->mair);
1077
1078                /*
1079                 * STE is live, and the SMMU might read dwords of this CD in any
1080                 * order. Ensure that it observes valid values before reading
1081                 * V=1.
1082                 */
1083                arm_smmu_sync_cd(smmu_domain, ssid, true);
1084
1085                val = cd->tcr |
1086#ifdef __BIG_ENDIAN
1087                        CTXDESC_CD_0_ENDI |
1088#endif
1089                        CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1090                        (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1091                        CTXDESC_CD_0_AA64 |
1092                        FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1093                        CTXDESC_CD_0_V;
1094
1095                if (smmu_domain->stall_enabled)
1096                        val |= CTXDESC_CD_0_S;
1097        }
1098
1099        /*
1100         * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1101         * "Configuration structures and configuration invalidation completion"
1102         *
1103         *   The size of single-copy atomic reads made by the SMMU is
1104         *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1105         *   field within an aligned 64-bit span of a structure can be altered
1106         *   without first making the structure invalid.
1107         */
1108        WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1109        arm_smmu_sync_cd(smmu_domain, ssid, true);
1110        return 0;
1111}
1112
1113static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1114{
1115        int ret;
1116        size_t l1size;
1117        size_t max_contexts;
1118        struct arm_smmu_device *smmu = smmu_domain->smmu;
1119        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1120        struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1121
1122        max_contexts = 1 << cfg->s1cdmax;
1123
1124        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1125            max_contexts <= CTXDESC_L2_ENTRIES) {
1126                cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1127                cdcfg->num_l1_ents = max_contexts;
1128
1129                l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1130        } else {
1131                cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1132                cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1133                                                  CTXDESC_L2_ENTRIES);
1134
1135                cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1136                                              sizeof(*cdcfg->l1_desc),
1137                                              GFP_KERNEL);
1138                if (!cdcfg->l1_desc)
1139                        return -ENOMEM;
1140
1141                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1142        }
1143
1144        cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1145                                           GFP_KERNEL);
1146        if (!cdcfg->cdtab) {
1147                dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1148                ret = -ENOMEM;
1149                goto err_free_l1;
1150        }
1151
1152        return 0;
1153
1154err_free_l1:
1155        if (cdcfg->l1_desc) {
1156                devm_kfree(smmu->dev, cdcfg->l1_desc);
1157                cdcfg->l1_desc = NULL;
1158        }
1159        return ret;
1160}
1161
1162static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1163{
1164        int i;
1165        size_t size, l1size;
1166        struct arm_smmu_device *smmu = smmu_domain->smmu;
1167        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1168
1169        if (cdcfg->l1_desc) {
1170                size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1171
1172                for (i = 0; i < cdcfg->num_l1_ents; i++) {
1173                        if (!cdcfg->l1_desc[i].l2ptr)
1174                                continue;
1175
1176                        dmam_free_coherent(smmu->dev, size,
1177                                           cdcfg->l1_desc[i].l2ptr,
1178                                           cdcfg->l1_desc[i].l2ptr_dma);
1179                }
1180                devm_kfree(smmu->dev, cdcfg->l1_desc);
1181                cdcfg->l1_desc = NULL;
1182
1183                l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1184        } else {
1185                l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1186        }
1187
1188        dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1189        cdcfg->cdtab_dma = 0;
1190        cdcfg->cdtab = NULL;
1191}
1192
1193bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1194{
1195        bool free;
1196        struct arm_smmu_ctx_desc *old_cd;
1197
1198        if (!cd->asid)
1199                return false;
1200
1201        free = refcount_dec_and_test(&cd->refs);
1202        if (free) {
1203                old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1204                WARN_ON(old_cd != cd);
1205        }
1206        return free;
1207}
1208
1209/* Stream table manipulation functions */
1210static void
1211arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1212{
1213        u64 val = 0;
1214
1215        val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1216        val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1217
1218        /* See comment in arm_smmu_write_ctx_desc() */
1219        WRITE_ONCE(*dst, cpu_to_le64(val));
1220}
1221
1222static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1223{
1224        struct arm_smmu_cmdq_ent cmd = {
1225                .opcode = CMDQ_OP_CFGI_STE,
1226                .cfgi   = {
1227                        .sid    = sid,
1228                        .leaf   = true,
1229                },
1230        };
1231
1232        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1233}
1234
1235static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1236                                      __le64 *dst)
1237{
1238        /*
1239         * This is hideously complicated, but we only really care about
1240         * three cases at the moment:
1241         *
1242         * 1. Invalid (all zero) -> bypass/fault (init)
1243         * 2. Bypass/fault -> translation/bypass (attach)
1244         * 3. Translation/bypass -> bypass/fault (detach)
1245         *
1246         * Given that we can't update the STE atomically and the SMMU
1247         * doesn't read the thing in a defined order, that leaves us
1248         * with the following maintenance requirements:
1249         *
1250         * 1. Update Config, return (init time STEs aren't live)
1251         * 2. Write everything apart from dword 0, sync, write dword 0, sync
1252         * 3. Update Config, sync
1253         */
1254        u64 val = le64_to_cpu(dst[0]);
1255        bool ste_live = false;
1256        struct arm_smmu_device *smmu = NULL;
1257        struct arm_smmu_s1_cfg *s1_cfg = NULL;
1258        struct arm_smmu_s2_cfg *s2_cfg = NULL;
1259        struct arm_smmu_domain *smmu_domain = NULL;
1260        struct arm_smmu_cmdq_ent prefetch_cmd = {
1261                .opcode         = CMDQ_OP_PREFETCH_CFG,
1262                .prefetch       = {
1263                        .sid    = sid,
1264                },
1265        };
1266
1267        if (master) {
1268                smmu_domain = master->domain;
1269                smmu = master->smmu;
1270        }
1271
1272        if (smmu_domain) {
1273                switch (smmu_domain->stage) {
1274                case ARM_SMMU_DOMAIN_S1:
1275                        s1_cfg = &smmu_domain->s1_cfg;
1276                        break;
1277                case ARM_SMMU_DOMAIN_S2:
1278                case ARM_SMMU_DOMAIN_NESTED:
1279                        s2_cfg = &smmu_domain->s2_cfg;
1280                        break;
1281                default:
1282                        break;
1283                }
1284        }
1285
1286        if (val & STRTAB_STE_0_V) {
1287                switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1288                case STRTAB_STE_0_CFG_BYPASS:
1289                        break;
1290                case STRTAB_STE_0_CFG_S1_TRANS:
1291                case STRTAB_STE_0_CFG_S2_TRANS:
1292                        ste_live = true;
1293                        break;
1294                case STRTAB_STE_0_CFG_ABORT:
1295                        BUG_ON(!disable_bypass);
1296                        break;
1297                default:
1298                        BUG(); /* STE corruption */
1299                }
1300        }
1301
1302        /* Nuke the existing STE_0 value, as we're going to rewrite it */
1303        val = STRTAB_STE_0_V;
1304
1305        /* Bypass/fault */
1306        if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1307                if (!smmu_domain && disable_bypass)
1308                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1309                else
1310                        val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1311
1312                dst[0] = cpu_to_le64(val);
1313                dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1314                                                STRTAB_STE_1_SHCFG_INCOMING));
1315                dst[2] = 0; /* Nuke the VMID */
1316                /*
1317                 * The SMMU can perform negative caching, so we must sync
1318                 * the STE regardless of whether the old value was live.
1319                 */
1320                if (smmu)
1321                        arm_smmu_sync_ste_for_sid(smmu, sid);
1322                return;
1323        }
1324
1325        if (s1_cfg) {
1326                u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1327                        STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1328
1329                BUG_ON(ste_live);
1330                dst[1] = cpu_to_le64(
1331                         FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1332                         FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1333                         FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1334                         FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1335                         FIELD_PREP(STRTAB_STE_1_STRW, strw));
1336
1337                if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1338                    !master->stall_enabled)
1339                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1340
1341                val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1342                        FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1343                        FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1344                        FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1345        }
1346
1347        if (s2_cfg) {
1348                BUG_ON(ste_live);
1349                dst[2] = cpu_to_le64(
1350                         FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1351                         FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1352#ifdef __BIG_ENDIAN
1353                         STRTAB_STE_2_S2ENDI |
1354#endif
1355                         STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1356                         STRTAB_STE_2_S2R);
1357
1358                dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1359
1360                val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1361        }
1362
1363        if (master->ats_enabled)
1364                dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1365                                                 STRTAB_STE_1_EATS_TRANS));
1366
1367        arm_smmu_sync_ste_for_sid(smmu, sid);
1368        /* See comment in arm_smmu_write_ctx_desc() */
1369        WRITE_ONCE(dst[0], cpu_to_le64(val));
1370        arm_smmu_sync_ste_for_sid(smmu, sid);
1371
1372        /* It's likely that we'll want to use the new STE soon */
1373        if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1374                arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1375}
1376
1377static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1378{
1379        unsigned int i;
1380
1381        for (i = 0; i < nent; ++i) {
1382                arm_smmu_write_strtab_ent(NULL, -1, strtab);
1383                strtab += STRTAB_STE_DWORDS;
1384        }
1385}
1386
1387static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1388{
1389        size_t size;
1390        void *strtab;
1391        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1392        struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1393
1394        if (desc->l2ptr)
1395                return 0;
1396
1397        size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1398        strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1399
1400        desc->span = STRTAB_SPLIT + 1;
1401        desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1402                                          GFP_KERNEL);
1403        if (!desc->l2ptr) {
1404                dev_err(smmu->dev,
1405                        "failed to allocate l2 stream table for SID %u\n",
1406                        sid);
1407                return -ENOMEM;
1408        }
1409
1410        arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1411        arm_smmu_write_strtab_l1_desc(strtab, desc);
1412        return 0;
1413}
1414
1415static struct arm_smmu_master *
1416arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1417{
1418        struct rb_node *node;
1419        struct arm_smmu_stream *stream;
1420
1421        lockdep_assert_held(&smmu->streams_mutex);
1422
1423        node = smmu->streams.rb_node;
1424        while (node) {
1425                stream = rb_entry(node, struct arm_smmu_stream, node);
1426                if (stream->id < sid)
1427                        node = node->rb_right;
1428                else if (stream->id > sid)
1429                        node = node->rb_left;
1430                else
1431                        return stream->master;
1432        }
1433
1434        return NULL;
1435}
1436
1437/* IRQ and event handlers */
1438static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1439{
1440        int ret;
1441        u32 reason;
1442        u32 perm = 0;
1443        struct arm_smmu_master *master;
1444        bool ssid_valid = evt[0] & EVTQ_0_SSV;
1445        u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1446        struct iommu_fault_event fault_evt = { };
1447        struct iommu_fault *flt = &fault_evt.fault;
1448
1449        switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1450        case EVT_ID_TRANSLATION_FAULT:
1451                reason = IOMMU_FAULT_REASON_PTE_FETCH;
1452                break;
1453        case EVT_ID_ADDR_SIZE_FAULT:
1454                reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1455                break;
1456        case EVT_ID_ACCESS_FAULT:
1457                reason = IOMMU_FAULT_REASON_ACCESS;
1458                break;
1459        case EVT_ID_PERMISSION_FAULT:
1460                reason = IOMMU_FAULT_REASON_PERMISSION;
1461                break;
1462        default:
1463                return -EOPNOTSUPP;
1464        }
1465
1466        /* Stage-2 is always pinned at the moment */
1467        if (evt[1] & EVTQ_1_S2)
1468                return -EFAULT;
1469
1470        if (evt[1] & EVTQ_1_RnW)
1471                perm |= IOMMU_FAULT_PERM_READ;
1472        else
1473                perm |= IOMMU_FAULT_PERM_WRITE;
1474
1475        if (evt[1] & EVTQ_1_InD)
1476                perm |= IOMMU_FAULT_PERM_EXEC;
1477
1478        if (evt[1] & EVTQ_1_PnU)
1479                perm |= IOMMU_FAULT_PERM_PRIV;
1480
1481        if (evt[1] & EVTQ_1_STALL) {
1482                flt->type = IOMMU_FAULT_PAGE_REQ;
1483                flt->prm = (struct iommu_fault_page_request) {
1484                        .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1485                        .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1486                        .perm = perm,
1487                        .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1488                };
1489
1490                if (ssid_valid) {
1491                        flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1492                        flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1493                }
1494        } else {
1495                flt->type = IOMMU_FAULT_DMA_UNRECOV;
1496                flt->event = (struct iommu_fault_unrecoverable) {
1497                        .reason = reason,
1498                        .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1499                        .perm = perm,
1500                        .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1501                };
1502
1503                if (ssid_valid) {
1504                        flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1505                        flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1506                }
1507        }
1508
1509        mutex_lock(&smmu->streams_mutex);
1510        master = arm_smmu_find_master(smmu, sid);
1511        if (!master) {
1512                ret = -EINVAL;
1513                goto out_unlock;
1514        }
1515
1516        ret = iommu_report_device_fault(master->dev, &fault_evt);
1517        if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1518                /* Nobody cared, abort the access */
1519                struct iommu_page_response resp = {
1520                        .pasid          = flt->prm.pasid,
1521                        .grpid          = flt->prm.grpid,
1522                        .code           = IOMMU_PAGE_RESP_FAILURE,
1523                };
1524                arm_smmu_page_response(master->dev, &fault_evt, &resp);
1525        }
1526
1527out_unlock:
1528        mutex_unlock(&smmu->streams_mutex);
1529        return ret;
1530}
1531
1532static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1533{
1534        int i, ret;
1535        struct arm_smmu_device *smmu = dev;
1536        struct arm_smmu_queue *q = &smmu->evtq.q;
1537        struct arm_smmu_ll_queue *llq = &q->llq;
1538        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1539                                      DEFAULT_RATELIMIT_BURST);
1540        u64 evt[EVTQ_ENT_DWORDS];
1541
1542        do {
1543                while (!queue_remove_raw(q, evt)) {
1544                        u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1545
1546                        ret = arm_smmu_handle_evt(smmu, evt);
1547                        if (!ret || !__ratelimit(&rs))
1548                                continue;
1549
1550                        dev_info(smmu->dev, "event 0x%02x received:\n", id);
1551                        for (i = 0; i < ARRAY_SIZE(evt); ++i)
1552                                dev_info(smmu->dev, "\t0x%016llx\n",
1553                                         (unsigned long long)evt[i]);
1554
1555                }
1556
1557                /*
1558                 * Not much we can do on overflow, so scream and pretend we're
1559                 * trying harder.
1560                 */
1561                if (queue_sync_prod_in(q) == -EOVERFLOW)
1562                        dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1563        } while (!queue_empty(llq));
1564
1565        /* Sync our overflow flag, as we believe we're up to speed */
1566        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1567                    Q_IDX(llq, llq->cons);
1568        return IRQ_HANDLED;
1569}
1570
1571static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1572{
1573        u32 sid, ssid;
1574        u16 grpid;
1575        bool ssv, last;
1576
1577        sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1578        ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1579        ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1580        last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1581        grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1582
1583        dev_info(smmu->dev, "unexpected PRI request received:\n");
1584        dev_info(smmu->dev,
1585                 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1586                 sid, ssid, grpid, last ? "L" : "",
1587                 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1588                 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1589                 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1590                 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1591                 evt[1] & PRIQ_1_ADDR_MASK);
1592
1593        if (last) {
1594                struct arm_smmu_cmdq_ent cmd = {
1595                        .opcode                 = CMDQ_OP_PRI_RESP,
1596                        .substream_valid        = ssv,
1597                        .pri                    = {
1598                                .sid    = sid,
1599                                .ssid   = ssid,
1600                                .grpid  = grpid,
1601                                .resp   = PRI_RESP_DENY,
1602                        },
1603                };
1604
1605                arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1606        }
1607}
1608
1609static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1610{
1611        struct arm_smmu_device *smmu = dev;
1612        struct arm_smmu_queue *q = &smmu->priq.q;
1613        struct arm_smmu_ll_queue *llq = &q->llq;
1614        u64 evt[PRIQ_ENT_DWORDS];
1615
1616        do {
1617                while (!queue_remove_raw(q, evt))
1618                        arm_smmu_handle_ppr(smmu, evt);
1619
1620                if (queue_sync_prod_in(q) == -EOVERFLOW)
1621                        dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1622        } while (!queue_empty(llq));
1623
1624        /* Sync our overflow flag, as we believe we're up to speed */
1625        llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1626                      Q_IDX(llq, llq->cons);
1627        queue_sync_cons_out(q);
1628        return IRQ_HANDLED;
1629}
1630
1631static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1632
1633static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1634{
1635        u32 gerror, gerrorn, active;
1636        struct arm_smmu_device *smmu = dev;
1637
1638        gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1639        gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1640
1641        active = gerror ^ gerrorn;
1642        if (!(active & GERROR_ERR_MASK))
1643                return IRQ_NONE; /* No errors pending */
1644
1645        dev_warn(smmu->dev,
1646                 "unexpected global error reported (0x%08x), this could be serious\n",
1647                 active);
1648
1649        if (active & GERROR_SFM_ERR) {
1650                dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1651                arm_smmu_device_disable(smmu);
1652        }
1653
1654        if (active & GERROR_MSI_GERROR_ABT_ERR)
1655                dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1656
1657        if (active & GERROR_MSI_PRIQ_ABT_ERR)
1658                dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1659
1660        if (active & GERROR_MSI_EVTQ_ABT_ERR)
1661                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1662
1663        if (active & GERROR_MSI_CMDQ_ABT_ERR)
1664                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1665
1666        if (active & GERROR_PRIQ_ABT_ERR)
1667                dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1668
1669        if (active & GERROR_EVTQ_ABT_ERR)
1670                dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1671
1672        if (active & GERROR_CMDQ_ERR)
1673                arm_smmu_cmdq_skip_err(smmu);
1674
1675        writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1676        return IRQ_HANDLED;
1677}
1678
1679static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1680{
1681        struct arm_smmu_device *smmu = dev;
1682
1683        arm_smmu_evtq_thread(irq, dev);
1684        if (smmu->features & ARM_SMMU_FEAT_PRI)
1685                arm_smmu_priq_thread(irq, dev);
1686
1687        return IRQ_HANDLED;
1688}
1689
1690static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1691{
1692        arm_smmu_gerror_handler(irq, dev);
1693        return IRQ_WAKE_THREAD;
1694}
1695
1696static void
1697arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1698                        struct arm_smmu_cmdq_ent *cmd)
1699{
1700        size_t log2_span;
1701        size_t span_mask;
1702        /* ATC invalidates are always on 4096-bytes pages */
1703        size_t inval_grain_shift = 12;
1704        unsigned long page_start, page_end;
1705
1706        /*
1707         * ATS and PASID:
1708         *
1709         * If substream_valid is clear, the PCIe TLP is sent without a PASID
1710         * prefix. In that case all ATC entries within the address range are
1711         * invalidated, including those that were requested with a PASID! There
1712         * is no way to invalidate only entries without PASID.
1713         *
1714         * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1715         * traffic), translation requests without PASID create ATC entries
1716         * without PASID, which must be invalidated with substream_valid clear.
1717         * This has the unpleasant side-effect of invalidating all PASID-tagged
1718         * ATC entries within the address range.
1719         */
1720        *cmd = (struct arm_smmu_cmdq_ent) {
1721                .opcode                 = CMDQ_OP_ATC_INV,
1722                .substream_valid        = !!ssid,
1723                .atc.ssid               = ssid,
1724        };
1725
1726        if (!size) {
1727                cmd->atc.size = ATC_INV_SIZE_ALL;
1728                return;
1729        }
1730
1731        page_start      = iova >> inval_grain_shift;
1732        page_end        = (iova + size - 1) >> inval_grain_shift;
1733
1734        /*
1735         * In an ATS Invalidate Request, the address must be aligned on the
1736         * range size, which must be a power of two number of page sizes. We
1737         * thus have to choose between grossly over-invalidating the region, or
1738         * splitting the invalidation into multiple commands. For simplicity
1739         * we'll go with the first solution, but should refine it in the future
1740         * if multiple commands are shown to be more efficient.
1741         *
1742         * Find the smallest power of two that covers the range. The most
1743         * significant differing bit between the start and end addresses,
1744         * fls(start ^ end), indicates the required span. For example:
1745         *
1746         * We want to invalidate pages [8; 11]. This is already the ideal range:
1747         *              x = 0b1000 ^ 0b1011 = 0b11
1748         *              span = 1 << fls(x) = 4
1749         *
1750         * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1751         *              x = 0b0111 ^ 0b1010 = 0b1101
1752         *              span = 1 << fls(x) = 16
1753         */
1754        log2_span       = fls_long(page_start ^ page_end);
1755        span_mask       = (1ULL << log2_span) - 1;
1756
1757        page_start      &= ~span_mask;
1758
1759        cmd->atc.addr   = page_start << inval_grain_shift;
1760        cmd->atc.size   = log2_span;
1761}
1762
1763static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1764{
1765        int i;
1766        struct arm_smmu_cmdq_ent cmd;
1767        struct arm_smmu_cmdq_batch cmds = {};
1768
1769        arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1770
1771        for (i = 0; i < master->num_streams; i++) {
1772                cmd.atc.sid = master->streams[i].id;
1773                arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1774        }
1775
1776        return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1777}
1778
1779int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1780                            unsigned long iova, size_t size)
1781{
1782        int i;
1783        unsigned long flags;
1784        struct arm_smmu_cmdq_ent cmd;
1785        struct arm_smmu_master *master;
1786        struct arm_smmu_cmdq_batch cmds;
1787
1788        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1789                return 0;
1790
1791        /*
1792         * Ensure that we've completed prior invalidation of the main TLBs
1793         * before we read 'nr_ats_masters' in case of a concurrent call to
1794         * arm_smmu_enable_ats():
1795         *
1796         *      // unmap()                      // arm_smmu_enable_ats()
1797         *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1798         *      smp_mb();                       [...]
1799         *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1800         *
1801         * Ensures that we always see the incremented 'nr_ats_masters' count if
1802         * ATS was enabled at the PCI device before completion of the TLBI.
1803         */
1804        smp_mb();
1805        if (!atomic_read(&smmu_domain->nr_ats_masters))
1806                return 0;
1807
1808        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1809
1810        cmds.num = 0;
1811
1812        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1813        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1814                if (!master->ats_enabled)
1815                        continue;
1816
1817                for (i = 0; i < master->num_streams; i++) {
1818                        cmd.atc.sid = master->streams[i].id;
1819                        arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1820                }
1821        }
1822        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1823
1824        return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1825}
1826
1827/* IO_PGTABLE API */
1828static void arm_smmu_tlb_inv_context(void *cookie)
1829{
1830        struct arm_smmu_domain *smmu_domain = cookie;
1831        struct arm_smmu_device *smmu = smmu_domain->smmu;
1832        struct arm_smmu_cmdq_ent cmd;
1833
1834        /*
1835         * NOTE: when io-pgtable is in non-strict mode, we may get here with
1836         * PTEs previously cleared by unmaps on the current CPU not yet visible
1837         * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1838         * insertion to guarantee those are observed before the TLBI. Do be
1839         * careful, 007.
1840         */
1841        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1842                arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1843        } else {
1844                cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1845                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1846                arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1847        }
1848        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1849}
1850
1851static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1852                                     unsigned long iova, size_t size,
1853                                     size_t granule,
1854                                     struct arm_smmu_domain *smmu_domain)
1855{
1856        struct arm_smmu_device *smmu = smmu_domain->smmu;
1857        unsigned long end = iova + size, num_pages = 0, tg = 0;
1858        size_t inv_range = granule;
1859        struct arm_smmu_cmdq_batch cmds;
1860
1861        if (!size)
1862                return;
1863
1864        if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1865                /* Get the leaf page size */
1866                tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1867
1868                /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1869                cmd->tlbi.tg = (tg - 10) / 2;
1870
1871                /* Determine what level the granule is at */
1872                cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1873
1874                num_pages = size >> tg;
1875        }
1876
1877        cmds.num = 0;
1878
1879        while (iova < end) {
1880                if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1881                        /*
1882                         * On each iteration of the loop, the range is 5 bits
1883                         * worth of the aligned size remaining.
1884                         * The range in pages is:
1885                         *
1886                         * range = (num_pages & (0x1f << __ffs(num_pages)))
1887                         */
1888                        unsigned long scale, num;
1889
1890                        /* Determine the power of 2 multiple number of pages */
1891                        scale = __ffs(num_pages);
1892                        cmd->tlbi.scale = scale;
1893
1894                        /* Determine how many chunks of 2^scale size we have */
1895                        num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1896                        cmd->tlbi.num = num - 1;
1897
1898                        /* range is num * 2^scale * pgsize */
1899                        inv_range = num << (scale + tg);
1900
1901                        /* Clear out the lower order bits for the next iteration */
1902                        num_pages -= num << scale;
1903                }
1904
1905                cmd->tlbi.addr = iova;
1906                arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1907                iova += inv_range;
1908        }
1909        arm_smmu_cmdq_batch_submit(smmu, &cmds);
1910}
1911
1912static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1913                                          size_t granule, bool leaf,
1914                                          struct arm_smmu_domain *smmu_domain)
1915{
1916        struct arm_smmu_cmdq_ent cmd = {
1917                .tlbi = {
1918                        .leaf   = leaf,
1919                },
1920        };
1921
1922        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1923                cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924                                  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1925                cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1926        } else {
1927                cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1928                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1929        }
1930        __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1931
1932        /*
1933         * Unfortunately, this can't be leaf-only since we may have
1934         * zapped an entire table.
1935         */
1936        arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1937}
1938
1939void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1940                                 size_t granule, bool leaf,
1941                                 struct arm_smmu_domain *smmu_domain)
1942{
1943        struct arm_smmu_cmdq_ent cmd = {
1944                .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1945                          CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1946                .tlbi = {
1947                        .asid   = asid,
1948                        .leaf   = leaf,
1949                },
1950        };
1951
1952        __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1953}
1954
1955static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1956                                         unsigned long iova, size_t granule,
1957                                         void *cookie)
1958{
1959        struct arm_smmu_domain *smmu_domain = cookie;
1960        struct iommu_domain *domain = &smmu_domain->domain;
1961
1962        iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1963}
1964
1965static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1966                                  size_t granule, void *cookie)
1967{
1968        arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1969}
1970
1971static const struct iommu_flush_ops arm_smmu_flush_ops = {
1972        .tlb_flush_all  = arm_smmu_tlb_inv_context,
1973        .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1974        .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1975};
1976
1977/* IOMMU API */
1978static bool arm_smmu_capable(enum iommu_cap cap)
1979{
1980        switch (cap) {
1981        case IOMMU_CAP_CACHE_COHERENCY:
1982                return true;
1983        case IOMMU_CAP_NOEXEC:
1984                return true;
1985        default:
1986                return false;
1987        }
1988}
1989
1990static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1991{
1992        struct arm_smmu_domain *smmu_domain;
1993
1994        if (type != IOMMU_DOMAIN_UNMANAGED &&
1995            type != IOMMU_DOMAIN_DMA &&
1996            type != IOMMU_DOMAIN_DMA_FQ &&
1997            type != IOMMU_DOMAIN_IDENTITY)
1998                return NULL;
1999
2000        /*
2001         * Allocate the domain and initialise some of its data structures.
2002         * We can't really do anything meaningful until we've added a
2003         * master.
2004         */
2005        smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2006        if (!smmu_domain)
2007                return NULL;
2008
2009        mutex_init(&smmu_domain->init_mutex);
2010        INIT_LIST_HEAD(&smmu_domain->devices);
2011        spin_lock_init(&smmu_domain->devices_lock);
2012        INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2013
2014        return &smmu_domain->domain;
2015}
2016
2017static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2018{
2019        int idx, size = 1 << span;
2020
2021        do {
2022                idx = find_first_zero_bit(map, size);
2023                if (idx == size)
2024                        return -ENOSPC;
2025        } while (test_and_set_bit(idx, map));
2026
2027        return idx;
2028}
2029
2030static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2031{
2032        clear_bit(idx, map);
2033}
2034
2035static void arm_smmu_domain_free(struct iommu_domain *domain)
2036{
2037        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2038        struct arm_smmu_device *smmu = smmu_domain->smmu;
2039
2040        free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2041
2042        /* Free the CD and ASID, if we allocated them */
2043        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2044                struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2045
2046                /* Prevent SVA from touching the CD while we're freeing it */
2047                mutex_lock(&arm_smmu_asid_lock);
2048                if (cfg->cdcfg.cdtab)
2049                        arm_smmu_free_cd_tables(smmu_domain);
2050                arm_smmu_free_asid(&cfg->cd);
2051                mutex_unlock(&arm_smmu_asid_lock);
2052        } else {
2053                struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2054                if (cfg->vmid)
2055                        arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2056        }
2057
2058        kfree(smmu_domain);
2059}
2060
2061static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2062                                       struct arm_smmu_master *master,
2063                                       struct io_pgtable_cfg *pgtbl_cfg)
2064{
2065        int ret;
2066        u32 asid;
2067        struct arm_smmu_device *smmu = smmu_domain->smmu;
2068        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2069        typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2070
2071        refcount_set(&cfg->cd.refs, 1);
2072
2073        /* Prevent SVA from modifying the ASID until it is written to the CD */
2074        mutex_lock(&arm_smmu_asid_lock);
2075        ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2076                       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2077        if (ret)
2078                goto out_unlock;
2079
2080        cfg->s1cdmax = master->ssid_bits;
2081
2082        smmu_domain->stall_enabled = master->stall_enabled;
2083
2084        ret = arm_smmu_alloc_cd_tables(smmu_domain);
2085        if (ret)
2086                goto out_free_asid;
2087
2088        cfg->cd.asid    = (u16)asid;
2089        cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2090        cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2091                          FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2092                          FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2093                          FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2094                          FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2095                          FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2096                          CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2097        cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2098
2099        /*
2100         * Note that this will end up calling arm_smmu_sync_cd() before
2101         * the master has been added to the devices list for this domain.
2102         * This isn't an issue because the STE hasn't been installed yet.
2103         */
2104        ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2105        if (ret)
2106                goto out_free_cd_tables;
2107
2108        mutex_unlock(&arm_smmu_asid_lock);
2109        return 0;
2110
2111out_free_cd_tables:
2112        arm_smmu_free_cd_tables(smmu_domain);
2113out_free_asid:
2114        arm_smmu_free_asid(&cfg->cd);
2115out_unlock:
2116        mutex_unlock(&arm_smmu_asid_lock);
2117        return ret;
2118}
2119
2120static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2121                                       struct arm_smmu_master *master,
2122                                       struct io_pgtable_cfg *pgtbl_cfg)
2123{
2124        int vmid;
2125        struct arm_smmu_device *smmu = smmu_domain->smmu;
2126        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2127        typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2128
2129        vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2130        if (vmid < 0)
2131                return vmid;
2132
2133        vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2134        cfg->vmid       = (u16)vmid;
2135        cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2136        cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2137                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2138                          FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2139                          FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2140                          FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2141                          FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2142                          FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2143        return 0;
2144}
2145
2146static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2147                                    struct arm_smmu_master *master)
2148{
2149        int ret;
2150        unsigned long ias, oas;
2151        enum io_pgtable_fmt fmt;
2152        struct io_pgtable_cfg pgtbl_cfg;
2153        struct io_pgtable_ops *pgtbl_ops;
2154        int (*finalise_stage_fn)(struct arm_smmu_domain *,
2155                                 struct arm_smmu_master *,
2156                                 struct io_pgtable_cfg *);
2157        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2158        struct arm_smmu_device *smmu = smmu_domain->smmu;
2159
2160        if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2161                smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2162                return 0;
2163        }
2164
2165        /* Restrict the stage to what we can actually support */
2166        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2167                smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2168        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2169                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2170
2171        switch (smmu_domain->stage) {
2172        case ARM_SMMU_DOMAIN_S1:
2173                ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2174                ias = min_t(unsigned long, ias, VA_BITS);
2175                oas = smmu->ias;
2176                fmt = ARM_64_LPAE_S1;
2177                finalise_stage_fn = arm_smmu_domain_finalise_s1;
2178                break;
2179        case ARM_SMMU_DOMAIN_NESTED:
2180        case ARM_SMMU_DOMAIN_S2:
2181                ias = smmu->ias;
2182                oas = smmu->oas;
2183                fmt = ARM_64_LPAE_S2;
2184                finalise_stage_fn = arm_smmu_domain_finalise_s2;
2185                break;
2186        default:
2187                return -EINVAL;
2188        }
2189
2190        pgtbl_cfg = (struct io_pgtable_cfg) {
2191                .pgsize_bitmap  = smmu->pgsize_bitmap,
2192                .ias            = ias,
2193                .oas            = oas,
2194                .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2195                .tlb            = &arm_smmu_flush_ops,
2196                .iommu_dev      = smmu->dev,
2197        };
2198
2199        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2200        if (!pgtbl_ops)
2201                return -ENOMEM;
2202
2203        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2204        domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2205        domain->geometry.force_aperture = true;
2206
2207        ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2208        if (ret < 0) {
2209                free_io_pgtable_ops(pgtbl_ops);
2210                return ret;
2211        }
2212
2213        smmu_domain->pgtbl_ops = pgtbl_ops;
2214        return 0;
2215}
2216
2217static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2218{
2219        __le64 *step;
2220        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2221
2222        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2223                struct arm_smmu_strtab_l1_desc *l1_desc;
2224                int idx;
2225
2226                /* Two-level walk */
2227                idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2228                l1_desc = &cfg->l1_desc[idx];
2229                idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2230                step = &l1_desc->l2ptr[idx];
2231        } else {
2232                /* Simple linear lookup */
2233                step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2234        }
2235
2236        return step;
2237}
2238
2239static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2240{
2241        int i, j;
2242        struct arm_smmu_device *smmu = master->smmu;
2243
2244        for (i = 0; i < master->num_streams; ++i) {
2245                u32 sid = master->streams[i].id;
2246                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2247
2248                /* Bridged PCI devices may end up with duplicated IDs */
2249                for (j = 0; j < i; j++)
2250                        if (master->streams[j].id == sid)
2251                                break;
2252                if (j < i)
2253                        continue;
2254
2255                arm_smmu_write_strtab_ent(master, sid, step);
2256        }
2257}
2258
2259static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2260{
2261        struct device *dev = master->dev;
2262        struct arm_smmu_device *smmu = master->smmu;
2263        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2264
2265        if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2266                return false;
2267
2268        if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2269                return false;
2270
2271        return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2272}
2273
2274static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2275{
2276        size_t stu;
2277        struct pci_dev *pdev;
2278        struct arm_smmu_device *smmu = master->smmu;
2279        struct arm_smmu_domain *smmu_domain = master->domain;
2280
2281        /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2282        if (!master->ats_enabled)
2283                return;
2284
2285        /* Smallest Translation Unit: log2 of the smallest supported granule */
2286        stu = __ffs(smmu->pgsize_bitmap);
2287        pdev = to_pci_dev(master->dev);
2288
2289        atomic_inc(&smmu_domain->nr_ats_masters);
2290        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2291        if (pci_enable_ats(pdev, stu))
2292                dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2293}
2294
2295static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2296{
2297        struct arm_smmu_domain *smmu_domain = master->domain;
2298
2299        if (!master->ats_enabled)
2300                return;
2301
2302        pci_disable_ats(to_pci_dev(master->dev));
2303        /*
2304         * Ensure ATS is disabled at the endpoint before we issue the
2305         * ATC invalidation via the SMMU.
2306         */
2307        wmb();
2308        arm_smmu_atc_inv_master(master);
2309        atomic_dec(&smmu_domain->nr_ats_masters);
2310}
2311
2312static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2313{
2314        int ret;
2315        int features;
2316        int num_pasids;
2317        struct pci_dev *pdev;
2318
2319        if (!dev_is_pci(master->dev))
2320                return -ENODEV;
2321
2322        pdev = to_pci_dev(master->dev);
2323
2324        features = pci_pasid_features(pdev);
2325        if (features < 0)
2326                return features;
2327
2328        num_pasids = pci_max_pasids(pdev);
2329        if (num_pasids <= 0)
2330                return num_pasids;
2331
2332        ret = pci_enable_pasid(pdev, features);
2333        if (ret) {
2334                dev_err(&pdev->dev, "Failed to enable PASID\n");
2335                return ret;
2336        }
2337
2338        master->ssid_bits = min_t(u8, ilog2(num_pasids),
2339                                  master->smmu->ssid_bits);
2340        return 0;
2341}
2342
2343static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2344{
2345        struct pci_dev *pdev;
2346
2347        if (!dev_is_pci(master->dev))
2348                return;
2349
2350        pdev = to_pci_dev(master->dev);
2351
2352        if (!pdev->pasid_enabled)
2353                return;
2354
2355        master->ssid_bits = 0;
2356        pci_disable_pasid(pdev);
2357}
2358
2359static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2360{
2361        unsigned long flags;
2362        struct arm_smmu_domain *smmu_domain = master->domain;
2363
2364        if (!smmu_domain)
2365                return;
2366
2367        arm_smmu_disable_ats(master);
2368
2369        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2370        list_del(&master->domain_head);
2371        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2372
2373        master->domain = NULL;
2374        master->ats_enabled = false;
2375        arm_smmu_install_ste_for_dev(master);
2376}
2377
2378static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2379{
2380        int ret = 0;
2381        unsigned long flags;
2382        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2383        struct arm_smmu_device *smmu;
2384        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2385        struct arm_smmu_master *master;
2386
2387        if (!fwspec)
2388                return -ENOENT;
2389
2390        master = dev_iommu_priv_get(dev);
2391        smmu = master->smmu;
2392
2393        /*
2394         * Checking that SVA is disabled ensures that this device isn't bound to
2395         * any mm, and can be safely detached from its old domain. Bonds cannot
2396         * be removed concurrently since we're holding the group mutex.
2397         */
2398        if (arm_smmu_master_sva_enabled(master)) {
2399                dev_err(dev, "cannot attach - SVA enabled\n");
2400                return -EBUSY;
2401        }
2402
2403        arm_smmu_detach_dev(master);
2404
2405        mutex_lock(&smmu_domain->init_mutex);
2406
2407        if (!smmu_domain->smmu) {
2408                smmu_domain->smmu = smmu;
2409                ret = arm_smmu_domain_finalise(domain, master);
2410                if (ret) {
2411                        smmu_domain->smmu = NULL;
2412                        goto out_unlock;
2413                }
2414        } else if (smmu_domain->smmu != smmu) {
2415                dev_err(dev,
2416                        "cannot attach to SMMU %s (upstream of %s)\n",
2417                        dev_name(smmu_domain->smmu->dev),
2418                        dev_name(smmu->dev));
2419                ret = -ENXIO;
2420                goto out_unlock;
2421        } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2422                   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2423                dev_err(dev,
2424                        "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2425                        smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2426                ret = -EINVAL;
2427                goto out_unlock;
2428        } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2429                   smmu_domain->stall_enabled != master->stall_enabled) {
2430                dev_err(dev, "cannot attach to stall-%s domain\n",
2431                        smmu_domain->stall_enabled ? "enabled" : "disabled");
2432                ret = -EINVAL;
2433                goto out_unlock;
2434        }
2435
2436        master->domain = smmu_domain;
2437
2438        if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2439                master->ats_enabled = arm_smmu_ats_supported(master);
2440
2441        arm_smmu_install_ste_for_dev(master);
2442
2443        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2444        list_add(&master->domain_head, &smmu_domain->devices);
2445        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2446
2447        arm_smmu_enable_ats(master);
2448
2449out_unlock:
2450        mutex_unlock(&smmu_domain->init_mutex);
2451        return ret;
2452}
2453
2454static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2455                              phys_addr_t paddr, size_t pgsize, size_t pgcount,
2456                              int prot, gfp_t gfp, size_t *mapped)
2457{
2458        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2459
2460        if (!ops)
2461                return -ENODEV;
2462
2463        return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2464}
2465
2466static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2467                                   size_t pgsize, size_t pgcount,
2468                                   struct iommu_iotlb_gather *gather)
2469{
2470        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2471        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2472
2473        if (!ops)
2474                return 0;
2475
2476        return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2477}
2478
2479static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2480{
2481        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2482
2483        if (smmu_domain->smmu)
2484                arm_smmu_tlb_inv_context(smmu_domain);
2485}
2486
2487static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2488                                struct iommu_iotlb_gather *gather)
2489{
2490        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2491
2492        if (!gather->pgsize)
2493                return;
2494
2495        arm_smmu_tlb_inv_range_domain(gather->start,
2496                                      gather->end - gather->start + 1,
2497                                      gather->pgsize, true, smmu_domain);
2498}
2499
2500static phys_addr_t
2501arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2502{
2503        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2504
2505        if (!ops)
2506                return 0;
2507
2508        return ops->iova_to_phys(ops, iova);
2509}
2510
2511static struct platform_driver arm_smmu_driver;
2512
2513static
2514struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2515{
2516        struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2517                                                          fwnode);
2518        put_device(dev);
2519        return dev ? dev_get_drvdata(dev) : NULL;
2520}
2521
2522static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2523{
2524        unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2525
2526        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2527                limit *= 1UL << STRTAB_SPLIT;
2528
2529        return sid < limit;
2530}
2531
2532static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2533                                  struct arm_smmu_master *master)
2534{
2535        int i;
2536        int ret = 0;
2537        struct arm_smmu_stream *new_stream, *cur_stream;
2538        struct rb_node **new_node, *parent_node = NULL;
2539        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2540
2541        master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2542                                  GFP_KERNEL);
2543        if (!master->streams)
2544                return -ENOMEM;
2545        master->num_streams = fwspec->num_ids;
2546
2547        mutex_lock(&smmu->streams_mutex);
2548        for (i = 0; i < fwspec->num_ids; i++) {
2549                u32 sid = fwspec->ids[i];
2550
2551                new_stream = &master->streams[i];
2552                new_stream->id = sid;
2553                new_stream->master = master;
2554
2555                /*
2556                 * Check the SIDs are in range of the SMMU and our stream table
2557                 */
2558                if (!arm_smmu_sid_in_range(smmu, sid)) {
2559                        ret = -ERANGE;
2560                        break;
2561                }
2562
2563                /* Ensure l2 strtab is initialised */
2564                if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2565                        ret = arm_smmu_init_l2_strtab(smmu, sid);
2566                        if (ret)
2567                                break;
2568                }
2569
2570                /* Insert into SID tree */
2571                new_node = &(smmu->streams.rb_node);
2572                while (*new_node) {
2573                        cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2574                                              node);
2575                        parent_node = *new_node;
2576                        if (cur_stream->id > new_stream->id) {
2577                                new_node = &((*new_node)->rb_left);
2578                        } else if (cur_stream->id < new_stream->id) {
2579                                new_node = &((*new_node)->rb_right);
2580                        } else {
2581                                dev_warn(master->dev,
2582                                         "stream %u already in tree\n",
2583                                         cur_stream->id);
2584                                ret = -EINVAL;
2585                                break;
2586                        }
2587                }
2588                if (ret)
2589                        break;
2590
2591                rb_link_node(&new_stream->node, parent_node, new_node);
2592                rb_insert_color(&new_stream->node, &smmu->streams);
2593        }
2594
2595        if (ret) {
2596                for (i--; i >= 0; i--)
2597                        rb_erase(&master->streams[i].node, &smmu->streams);
2598                kfree(master->streams);
2599        }
2600        mutex_unlock(&smmu->streams_mutex);
2601
2602        return ret;
2603}
2604
2605static void arm_smmu_remove_master(struct arm_smmu_master *master)
2606{
2607        int i;
2608        struct arm_smmu_device *smmu = master->smmu;
2609        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2610
2611        if (!smmu || !master->streams)
2612                return;
2613
2614        mutex_lock(&smmu->streams_mutex);
2615        for (i = 0; i < fwspec->num_ids; i++)
2616                rb_erase(&master->streams[i].node, &smmu->streams);
2617        mutex_unlock(&smmu->streams_mutex);
2618
2619        kfree(master->streams);
2620}
2621
2622static struct iommu_ops arm_smmu_ops;
2623
2624static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2625{
2626        int ret;
2627        struct arm_smmu_device *smmu;
2628        struct arm_smmu_master *master;
2629        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2630
2631        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2632                return ERR_PTR(-ENODEV);
2633
2634        if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2635                return ERR_PTR(-EBUSY);
2636
2637        smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2638        if (!smmu)
2639                return ERR_PTR(-ENODEV);
2640
2641        master = kzalloc(sizeof(*master), GFP_KERNEL);
2642        if (!master)
2643                return ERR_PTR(-ENOMEM);
2644
2645        master->dev = dev;
2646        master->smmu = smmu;
2647        INIT_LIST_HEAD(&master->bonds);
2648        dev_iommu_priv_set(dev, master);
2649
2650        ret = arm_smmu_insert_master(smmu, master);
2651        if (ret)
2652                goto err_free_master;
2653
2654        device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2655        master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2656
2657        /*
2658         * Note that PASID must be enabled before, and disabled after ATS:
2659         * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2660         *
2661         *   Behavior is undefined if this bit is Set and the value of the PASID
2662         *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2663         *   are changed.
2664         */
2665        arm_smmu_enable_pasid(master);
2666
2667        if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2668                master->ssid_bits = min_t(u8, master->ssid_bits,
2669                                          CTXDESC_LINEAR_CDMAX);
2670
2671        if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2672             device_property_read_bool(dev, "dma-can-stall")) ||
2673            smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2674                master->stall_enabled = true;
2675
2676        return &smmu->iommu;
2677
2678err_free_master:
2679        kfree(master);
2680        dev_iommu_priv_set(dev, NULL);
2681        return ERR_PTR(ret);
2682}
2683
2684static void arm_smmu_release_device(struct device *dev)
2685{
2686        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2687        struct arm_smmu_master *master;
2688
2689        if (!fwspec || fwspec->ops != &arm_smmu_ops)
2690                return;
2691
2692        master = dev_iommu_priv_get(dev);
2693        if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2694                iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2695        arm_smmu_detach_dev(master);
2696        arm_smmu_disable_pasid(master);
2697        arm_smmu_remove_master(master);
2698        kfree(master);
2699        iommu_fwspec_free(dev);
2700}
2701
2702static struct iommu_group *arm_smmu_device_group(struct device *dev)
2703{
2704        struct iommu_group *group;
2705
2706        /*
2707         * We don't support devices sharing stream IDs other than PCI RID
2708         * aliases, since the necessary ID-to-device lookup becomes rather
2709         * impractical given a potential sparse 32-bit stream ID space.
2710         */
2711        if (dev_is_pci(dev))
2712                group = pci_device_group(dev);
2713        else
2714                group = generic_device_group(dev);
2715
2716        return group;
2717}
2718
2719static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2720{
2721        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2722        int ret = 0;
2723
2724        mutex_lock(&smmu_domain->init_mutex);
2725        if (smmu_domain->smmu)
2726                ret = -EPERM;
2727        else
2728                smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2729        mutex_unlock(&smmu_domain->init_mutex);
2730
2731        return ret;
2732}
2733
2734static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2735{
2736        return iommu_fwspec_add_ids(dev, args->args, 1);
2737}
2738
2739static void arm_smmu_get_resv_regions(struct device *dev,
2740                                      struct list_head *head)
2741{
2742        struct iommu_resv_region *region;
2743        int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2744
2745        region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2746                                         prot, IOMMU_RESV_SW_MSI);
2747        if (!region)
2748                return;
2749
2750        list_add_tail(&region->list, head);
2751
2752        iommu_dma_get_resv_regions(dev, head);
2753}
2754
2755static bool arm_smmu_dev_has_feature(struct device *dev,
2756                                     enum iommu_dev_features feat)
2757{
2758        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2759
2760        if (!master)
2761                return false;
2762
2763        switch (feat) {
2764        case IOMMU_DEV_FEAT_IOPF:
2765                return arm_smmu_master_iopf_supported(master);
2766        case IOMMU_DEV_FEAT_SVA:
2767                return arm_smmu_master_sva_supported(master);
2768        default:
2769                return false;
2770        }
2771}
2772
2773static bool arm_smmu_dev_feature_enabled(struct device *dev,
2774                                         enum iommu_dev_features feat)
2775{
2776        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2777
2778        if (!master)
2779                return false;
2780
2781        switch (feat) {
2782        case IOMMU_DEV_FEAT_IOPF:
2783                return master->iopf_enabled;
2784        case IOMMU_DEV_FEAT_SVA:
2785                return arm_smmu_master_sva_enabled(master);
2786        default:
2787                return false;
2788        }
2789}
2790
2791static int arm_smmu_dev_enable_feature(struct device *dev,
2792                                       enum iommu_dev_features feat)
2793{
2794        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2795
2796        if (!arm_smmu_dev_has_feature(dev, feat))
2797                return -ENODEV;
2798
2799        if (arm_smmu_dev_feature_enabled(dev, feat))
2800                return -EBUSY;
2801
2802        switch (feat) {
2803        case IOMMU_DEV_FEAT_IOPF:
2804                master->iopf_enabled = true;
2805                return 0;
2806        case IOMMU_DEV_FEAT_SVA:
2807                return arm_smmu_master_enable_sva(master);
2808        default:
2809                return -EINVAL;
2810        }
2811}
2812
2813static int arm_smmu_dev_disable_feature(struct device *dev,
2814                                        enum iommu_dev_features feat)
2815{
2816        struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2817
2818        if (!arm_smmu_dev_feature_enabled(dev, feat))
2819                return -EINVAL;
2820
2821        switch (feat) {
2822        case IOMMU_DEV_FEAT_IOPF:
2823                if (master->sva_enabled)
2824                        return -EBUSY;
2825                master->iopf_enabled = false;
2826                return 0;
2827        case IOMMU_DEV_FEAT_SVA:
2828                return arm_smmu_master_disable_sva(master);
2829        default:
2830                return -EINVAL;
2831        }
2832}
2833
2834static struct iommu_ops arm_smmu_ops = {
2835        .capable                = arm_smmu_capable,
2836        .domain_alloc           = arm_smmu_domain_alloc,
2837        .domain_free            = arm_smmu_domain_free,
2838        .attach_dev             = arm_smmu_attach_dev,
2839        .map_pages              = arm_smmu_map_pages,
2840        .unmap_pages            = arm_smmu_unmap_pages,
2841        .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2842        .iotlb_sync             = arm_smmu_iotlb_sync,
2843        .iova_to_phys           = arm_smmu_iova_to_phys,
2844        .probe_device           = arm_smmu_probe_device,
2845        .release_device         = arm_smmu_release_device,
2846        .device_group           = arm_smmu_device_group,
2847        .enable_nesting         = arm_smmu_enable_nesting,
2848        .of_xlate               = arm_smmu_of_xlate,
2849        .get_resv_regions       = arm_smmu_get_resv_regions,
2850        .put_resv_regions       = generic_iommu_put_resv_regions,
2851        .dev_has_feat           = arm_smmu_dev_has_feature,
2852        .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2853        .dev_enable_feat        = arm_smmu_dev_enable_feature,
2854        .dev_disable_feat       = arm_smmu_dev_disable_feature,
2855        .sva_bind               = arm_smmu_sva_bind,
2856        .sva_unbind             = arm_smmu_sva_unbind,
2857        .sva_get_pasid          = arm_smmu_sva_get_pasid,
2858        .page_response          = arm_smmu_page_response,
2859        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2860        .owner                  = THIS_MODULE,
2861};
2862
2863/* Probing and initialisation functions */
2864static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2865                                   struct arm_smmu_queue *q,
2866                                   void __iomem *page,
2867                                   unsigned long prod_off,
2868                                   unsigned long cons_off,
2869                                   size_t dwords, const char *name)
2870{
2871        size_t qsz;
2872
2873        do {
2874                qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2875                q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2876                                              GFP_KERNEL);
2877                if (q->base || qsz < PAGE_SIZE)
2878                        break;
2879
2880                q->llq.max_n_shift--;
2881        } while (1);
2882
2883        if (!q->base) {
2884                dev_err(smmu->dev,
2885                        "failed to allocate queue (0x%zx bytes) for %s\n",
2886                        qsz, name);
2887                return -ENOMEM;
2888        }
2889
2890        if (!WARN_ON(q->base_dma & (qsz - 1))) {
2891                dev_info(smmu->dev, "allocated %u entries for %s\n",
2892                         1 << q->llq.max_n_shift, name);
2893        }
2894
2895        q->prod_reg     = page + prod_off;
2896        q->cons_reg     = page + cons_off;
2897        q->ent_dwords   = dwords;
2898
2899        q->q_base  = Q_BASE_RWA;
2900        q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2901        q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2902
2903        q->llq.prod = q->llq.cons = 0;
2904        return 0;
2905}
2906
2907static void arm_smmu_cmdq_free_bitmap(void *data)
2908{
2909        unsigned long *bitmap = data;
2910        bitmap_free(bitmap);
2911}
2912
2913static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2914{
2915        int ret = 0;
2916        struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2917        unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2918        atomic_long_t *bitmap;
2919
2920        atomic_set(&cmdq->owner_prod, 0);
2921        atomic_set(&cmdq->lock, 0);
2922
2923        bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2924        if (!bitmap) {
2925                dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2926                ret = -ENOMEM;
2927        } else {
2928                cmdq->valid_map = bitmap;
2929                devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2930        }
2931
2932        return ret;
2933}
2934
2935static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2936{
2937        int ret;
2938
2939        /* cmdq */
2940        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2941                                      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2942                                      CMDQ_ENT_DWORDS, "cmdq");
2943        if (ret)
2944                return ret;
2945
2946        ret = arm_smmu_cmdq_init(smmu);
2947        if (ret)
2948                return ret;
2949
2950        /* evtq */
2951        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2952                                      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2953                                      EVTQ_ENT_DWORDS, "evtq");
2954        if (ret)
2955                return ret;
2956
2957        if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2958            (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2959                smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2960                if (!smmu->evtq.iopf)
2961                        return -ENOMEM;
2962        }
2963
2964        /* priq */
2965        if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2966                return 0;
2967
2968        return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2969                                       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2970                                       PRIQ_ENT_DWORDS, "priq");
2971}
2972
2973static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2974{
2975        unsigned int i;
2976        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2977        size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2978        void *strtab = smmu->strtab_cfg.strtab;
2979
2980        cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2981        if (!cfg->l1_desc)
2982                return -ENOMEM;
2983
2984        for (i = 0; i < cfg->num_l1_ents; ++i) {
2985                arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2986                strtab += STRTAB_L1_DESC_DWORDS << 3;
2987        }
2988
2989        return 0;
2990}
2991
2992static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2993{
2994        void *strtab;
2995        u64 reg;
2996        u32 size, l1size;
2997        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2998
2999        /* Calculate the L1 size, capped to the SIDSIZE. */
3000        size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3001        size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3002        cfg->num_l1_ents = 1 << size;
3003
3004        size += STRTAB_SPLIT;
3005        if (size < smmu->sid_bits)
3006                dev_warn(smmu->dev,
3007                         "2-level strtab only covers %u/%u bits of SID\n",
3008                         size, smmu->sid_bits);
3009
3010        l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3011        strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3012                                     GFP_KERNEL);
3013        if (!strtab) {
3014                dev_err(smmu->dev,
3015                        "failed to allocate l1 stream table (%u bytes)\n",
3016                        l1size);
3017                return -ENOMEM;
3018        }
3019        cfg->strtab = strtab;
3020
3021        /* Configure strtab_base_cfg for 2 levels */
3022        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3023        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3024        reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3025        cfg->strtab_base_cfg = reg;
3026
3027        return arm_smmu_init_l1_strtab(smmu);
3028}
3029
3030static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3031{
3032        void *strtab;
3033        u64 reg;
3034        u32 size;
3035        struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3036
3037        size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3038        strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3039                                     GFP_KERNEL);
3040        if (!strtab) {
3041                dev_err(smmu->dev,
3042                        "failed to allocate linear stream table (%u bytes)\n",
3043                        size);
3044                return -ENOMEM;
3045        }
3046        cfg->strtab = strtab;
3047        cfg->num_l1_ents = 1 << smmu->sid_bits;
3048
3049        /* Configure strtab_base_cfg for a linear table covering all SIDs */
3050        reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3051        reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3052        cfg->strtab_base_cfg = reg;
3053
3054        arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3055        return 0;
3056}
3057
3058static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3059{
3060        u64 reg;
3061        int ret;
3062
3063        if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3064                ret = arm_smmu_init_strtab_2lvl(smmu);
3065        else
3066                ret = arm_smmu_init_strtab_linear(smmu);
3067
3068        if (ret)
3069                return ret;
3070
3071        /* Set the strtab base address */
3072        reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3073        reg |= STRTAB_BASE_RA;
3074        smmu->strtab_cfg.strtab_base = reg;
3075
3076        /* Allocate the first VMID for stage-2 bypass STEs */
3077        set_bit(0, smmu->vmid_map);
3078        return 0;
3079}
3080
3081static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3082{
3083        int ret;
3084
3085        mutex_init(&smmu->streams_mutex);
3086        smmu->streams = RB_ROOT;
3087
3088        ret = arm_smmu_init_queues(smmu);
3089        if (ret)
3090                return ret;
3091
3092        return arm_smmu_init_strtab(smmu);
3093}
3094
3095static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3096                                   unsigned int reg_off, unsigned int ack_off)
3097{
3098        u32 reg;
3099
3100        writel_relaxed(val, smmu->base + reg_off);
3101        return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3102                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3103}
3104
3105/* GBPA is "special" */
3106static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3107{
3108        int ret;
3109        u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3110
3111        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3112                                         1, ARM_SMMU_POLL_TIMEOUT_US);
3113        if (ret)
3114                return ret;
3115
3116        reg &= ~clr;
3117        reg |= set;
3118        writel_relaxed(reg | GBPA_UPDATE, gbpa);
3119        ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3120                                         1, ARM_SMMU_POLL_TIMEOUT_US);
3121
3122        if (ret)
3123                dev_err(smmu->dev, "GBPA not responding to update\n");
3124        return ret;
3125}
3126
3127static void arm_smmu_free_msis(void *data)
3128{
3129        struct device *dev = data;
3130        platform_msi_domain_free_irqs(dev);
3131}
3132
3133static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3134{
3135        phys_addr_t doorbell;
3136        struct device *dev = msi_desc_to_dev(desc);
3137        struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3138        phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3139
3140        doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3141        doorbell &= MSI_CFG0_ADDR_MASK;
3142
3143        writeq_relaxed(doorbell, smmu->base + cfg[0]);
3144        writel_relaxed(msg->data, smmu->base + cfg[1]);
3145        writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3146}
3147
3148static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3149{
3150        struct msi_desc *desc;
3151        int ret, nvec = ARM_SMMU_MAX_MSIS;
3152        struct device *dev = smmu->dev;
3153
3154        /* Clear the MSI address regs */
3155        writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3156        writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3157
3158        if (smmu->features & ARM_SMMU_FEAT_PRI)
3159                writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3160        else
3161                nvec--;
3162
3163        if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3164                return;
3165
3166        if (!dev->msi_domain) {
3167                dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3168                return;
3169        }
3170
3171        /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3172        ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3173        if (ret) {
3174                dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3175                return;
3176        }
3177
3178        for_each_msi_entry(desc, dev) {
3179                switch (desc->platform.msi_index) {
3180                case EVTQ_MSI_INDEX:
3181                        smmu->evtq.q.irq = desc->irq;
3182                        break;
3183                case GERROR_MSI_INDEX:
3184                        smmu->gerr_irq = desc->irq;
3185                        break;
3186                case PRIQ_MSI_INDEX:
3187                        smmu->priq.q.irq = desc->irq;
3188                        break;
3189                default:        /* Unknown */
3190                        continue;
3191                }
3192        }
3193
3194        /* Add callback to free MSIs on teardown */
3195        devm_add_action(dev, arm_smmu_free_msis, dev);
3196}
3197
3198static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3199{
3200        int irq, ret;
3201
3202        arm_smmu_setup_msis(smmu);
3203
3204        /* Request interrupt lines */
3205        irq = smmu->evtq.q.irq;
3206        if (irq) {
3207                ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3208                                                arm_smmu_evtq_thread,
3209                                                IRQF_ONESHOT,
3210                                                "arm-smmu-v3-evtq", smmu);
3211                if (ret < 0)
3212                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
3213        } else {
3214                dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3215        }
3216
3217        irq = smmu->gerr_irq;
3218        if (irq) {
3219                ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3220                                       0, "arm-smmu-v3-gerror", smmu);
3221                if (ret < 0)
3222                        dev_warn(smmu->dev, "failed to enable gerror irq\n");
3223        } else {
3224                dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3225        }
3226
3227        if (smmu->features & ARM_SMMU_FEAT_PRI) {
3228                irq = smmu->priq.q.irq;
3229                if (irq) {
3230                        ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3231                                                        arm_smmu_priq_thread,
3232                                                        IRQF_ONESHOT,
3233                                                        "arm-smmu-v3-priq",
3234                                                        smmu);
3235                        if (ret < 0)
3236                                dev_warn(smmu->dev,
3237                                         "failed to enable priq irq\n");
3238                } else {
3239                        dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3240                }
3241        }
3242}
3243
3244static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3245{
3246        int ret, irq;
3247        u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3248
3249        /* Disable IRQs first */
3250        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3251                                      ARM_SMMU_IRQ_CTRLACK);
3252        if (ret) {
3253                dev_err(smmu->dev, "failed to disable irqs\n");
3254                return ret;
3255        }
3256
3257        irq = smmu->combined_irq;
3258        if (irq) {
3259                /*
3260                 * Cavium ThunderX2 implementation doesn't support unique irq
3261                 * lines. Use a single irq line for all the SMMUv3 interrupts.
3262                 */
3263                ret = devm_request_threaded_irq(smmu->dev, irq,
3264                                        arm_smmu_combined_irq_handler,
3265                                        arm_smmu_combined_irq_thread,
3266                                        IRQF_ONESHOT,
3267                                        "arm-smmu-v3-combined-irq", smmu);
3268                if (ret < 0)
3269                        dev_warn(smmu->dev, "failed to enable combined irq\n");
3270        } else
3271                arm_smmu_setup_unique_irqs(smmu);
3272
3273        if (smmu->features & ARM_SMMU_FEAT_PRI)
3274                irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3275
3276        /* Enable interrupt generation on the SMMU */
3277        ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3278                                      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3279        if (ret)
3280                dev_warn(smmu->dev, "failed to enable irqs\n");
3281
3282        return 0;
3283}
3284
3285static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3286{
3287        int ret;
3288
3289        ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3290        if (ret)
3291                dev_err(smmu->dev, "failed to clear cr0\n");
3292
3293        return ret;
3294}
3295
3296static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3297{
3298        int ret;
3299        u32 reg, enables;
3300        struct arm_smmu_cmdq_ent cmd;
3301
3302        /* Clear CR0 and sync (disables SMMU and queue processing) */
3303        reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3304        if (reg & CR0_SMMUEN) {
3305                dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3306                WARN_ON(is_kdump_kernel() && !disable_bypass);
3307                arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3308        }
3309
3310        ret = arm_smmu_device_disable(smmu);
3311        if (ret)
3312                return ret;
3313
3314        /* CR1 (table and queue memory attributes) */
3315        reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3316              FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3317              FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3318              FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3319              FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3320              FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3321        writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3322
3323        /* CR2 (random crap) */
3324        reg = CR2_PTM | CR2_RECINVSID;
3325
3326        if (smmu->features & ARM_SMMU_FEAT_E2H)
3327                reg |= CR2_E2H;
3328
3329        writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3330
3331        /* Stream table */
3332        writeq_relaxed(smmu->strtab_cfg.strtab_base,
3333                       smmu->base + ARM_SMMU_STRTAB_BASE);
3334        writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3335                       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3336
3337        /* Command queue */
3338        writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3339        writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3340        writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3341
3342        enables = CR0_CMDQEN;
3343        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3344                                      ARM_SMMU_CR0ACK);
3345        if (ret) {
3346                dev_err(smmu->dev, "failed to enable command queue\n");
3347                return ret;
3348        }
3349
3350        /* Invalidate any cached configuration */
3351        cmd.opcode = CMDQ_OP_CFGI_ALL;
3352        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3353
3354        /* Invalidate any stale TLB entries */
3355        if (smmu->features & ARM_SMMU_FEAT_HYP) {
3356                cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3357                arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3358        }
3359
3360        cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3361        arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3362
3363        /* Event queue */
3364        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3365        writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3366        writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3367
3368        enables |= CR0_EVTQEN;
3369        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3370                                      ARM_SMMU_CR0ACK);
3371        if (ret) {
3372                dev_err(smmu->dev, "failed to enable event queue\n");
3373                return ret;
3374        }
3375
3376        /* PRI queue */
3377        if (smmu->features & ARM_SMMU_FEAT_PRI) {
3378                writeq_relaxed(smmu->priq.q.q_base,
3379                               smmu->base + ARM_SMMU_PRIQ_BASE);
3380                writel_relaxed(smmu->priq.q.llq.prod,
3381                               smmu->page1 + ARM_SMMU_PRIQ_PROD);
3382                writel_relaxed(smmu->priq.q.llq.cons,
3383                               smmu->page1 + ARM_SMMU_PRIQ_CONS);
3384
3385                enables |= CR0_PRIQEN;
3386                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3387                                              ARM_SMMU_CR0ACK);
3388                if (ret) {
3389                        dev_err(smmu->dev, "failed to enable PRI queue\n");
3390                        return ret;
3391                }
3392        }
3393
3394        if (smmu->features & ARM_SMMU_FEAT_ATS) {
3395                enables |= CR0_ATSCHK;
3396                ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3397                                              ARM_SMMU_CR0ACK);
3398                if (ret) {
3399                        dev_err(smmu->dev, "failed to enable ATS check\n");
3400                        return ret;
3401                }
3402        }
3403
3404        ret = arm_smmu_setup_irqs(smmu);
3405        if (ret) {
3406                dev_err(smmu->dev, "failed to setup irqs\n");
3407                return ret;
3408        }
3409
3410        if (is_kdump_kernel())
3411                enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3412
3413        /* Enable the SMMU interface, or ensure bypass */
3414        if (!bypass || disable_bypass) {
3415                enables |= CR0_SMMUEN;
3416        } else {
3417                ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3418                if (ret)
3419                        return ret;
3420        }
3421        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3422                                      ARM_SMMU_CR0ACK);
3423        if (ret) {
3424                dev_err(smmu->dev, "failed to enable SMMU interface\n");
3425                return ret;
3426        }
3427
3428        return 0;
3429}
3430
3431static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3432{
3433        u32 reg;
3434        bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3435
3436        /* IDR0 */
3437        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3438
3439        /* 2-level structures */
3440        if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3441                smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3442
3443        if (reg & IDR0_CD2L)
3444                smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3445
3446        /*
3447         * Translation table endianness.
3448         * We currently require the same endianness as the CPU, but this
3449         * could be changed later by adding a new IO_PGTABLE_QUIRK.
3450         */
3451        switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3452        case IDR0_TTENDIAN_MIXED:
3453                smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3454                break;
3455#ifdef __BIG_ENDIAN
3456        case IDR0_TTENDIAN_BE:
3457                smmu->features |= ARM_SMMU_FEAT_TT_BE;
3458                break;
3459#else
3460        case IDR0_TTENDIAN_LE:
3461                smmu->features |= ARM_SMMU_FEAT_TT_LE;
3462                break;
3463#endif
3464        default:
3465                dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3466                return -ENXIO;
3467        }
3468
3469        /* Boolean feature flags */
3470        if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3471                smmu->features |= ARM_SMMU_FEAT_PRI;
3472
3473        if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3474                smmu->features |= ARM_SMMU_FEAT_ATS;
3475
3476        if (reg & IDR0_SEV)
3477                smmu->features |= ARM_SMMU_FEAT_SEV;
3478
3479        if (reg & IDR0_MSI) {
3480                smmu->features |= ARM_SMMU_FEAT_MSI;
3481                if (coherent && !disable_msipolling)
3482                        smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3483        }
3484
3485        if (reg & IDR0_HYP) {
3486                smmu->features |= ARM_SMMU_FEAT_HYP;
3487                if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3488                        smmu->features |= ARM_SMMU_FEAT_E2H;
3489        }
3490
3491        /*
3492         * The coherency feature as set by FW is used in preference to the ID
3493         * register, but warn on mismatch.
3494         */
3495        if (!!(reg & IDR0_COHACC) != coherent)
3496                dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3497                         coherent ? "true" : "false");
3498
3499        switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3500        case IDR0_STALL_MODEL_FORCE:
3501                smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3502                fallthrough;
3503        case IDR0_STALL_MODEL_STALL:
3504                smmu->features |= ARM_SMMU_FEAT_STALLS;
3505        }
3506
3507        if (reg & IDR0_S1P)
3508                smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3509
3510        if (reg & IDR0_S2P)
3511                smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3512
3513        if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3514                dev_err(smmu->dev, "no translation support!\n");
3515                return -ENXIO;
3516        }
3517
3518        /* We only support the AArch64 table format at present */
3519        switch (FIELD_GET(IDR0_TTF, reg)) {
3520        case IDR0_TTF_AARCH32_64:
3521                smmu->ias = 40;
3522                fallthrough;
3523        case IDR0_TTF_AARCH64:
3524                break;
3525        default:
3526                dev_err(smmu->dev, "AArch64 table format not supported!\n");
3527                return -ENXIO;
3528        }
3529
3530        /* ASID/VMID sizes */
3531        smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3532        smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3533
3534        /* IDR1 */
3535        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3536        if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3537                dev_err(smmu->dev, "embedded implementation not supported\n");
3538                return -ENXIO;
3539        }
3540
3541        /* Queue sizes, capped to ensure natural alignment */
3542        smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3543                                             FIELD_GET(IDR1_CMDQS, reg));
3544        if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3545                /*
3546                 * We don't support splitting up batches, so one batch of
3547                 * commands plus an extra sync needs to fit inside the command
3548                 * queue. There's also no way we can handle the weird alignment
3549                 * restrictions on the base pointer for a unit-length queue.
3550                 */
3551                dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3552                        CMDQ_BATCH_ENTRIES);
3553                return -ENXIO;
3554        }
3555
3556        smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3557                                             FIELD_GET(IDR1_EVTQS, reg));
3558        smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3559                                             FIELD_GET(IDR1_PRIQS, reg));
3560
3561        /* SID/SSID sizes */
3562        smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3563        smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3564
3565        /*
3566         * If the SMMU supports fewer bits than would fill a single L2 stream
3567         * table, use a linear table instead.
3568         */
3569        if (smmu->sid_bits <= STRTAB_SPLIT)
3570                smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3571
3572        /* IDR3 */
3573        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3574        if (FIELD_GET(IDR3_RIL, reg))
3575                smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3576
3577        /* IDR5 */
3578        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3579
3580        /* Maximum number of outstanding stalls */
3581        smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3582
3583        /* Page sizes */
3584        if (reg & IDR5_GRAN64K)
3585                smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3586        if (reg & IDR5_GRAN16K)
3587                smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3588        if (reg & IDR5_GRAN4K)
3589                smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3590
3591        /* Input address size */
3592        if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3593                smmu->features |= ARM_SMMU_FEAT_VAX;
3594
3595        /* Output address size */
3596        switch (FIELD_GET(IDR5_OAS, reg)) {
3597        case IDR5_OAS_32_BIT:
3598                smmu->oas = 32;
3599                break;
3600        case IDR5_OAS_36_BIT:
3601                smmu->oas = 36;
3602                break;
3603        case IDR5_OAS_40_BIT:
3604                smmu->oas = 40;
3605                break;
3606        case IDR5_OAS_42_BIT:
3607                smmu->oas = 42;
3608                break;
3609        case IDR5_OAS_44_BIT:
3610                smmu->oas = 44;
3611                break;
3612        case IDR5_OAS_52_BIT:
3613                smmu->oas = 52;
3614                smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3615                break;
3616        default:
3617                dev_info(smmu->dev,
3618                        "unknown output address size. Truncating to 48-bit\n");
3619                fallthrough;
3620        case IDR5_OAS_48_BIT:
3621                smmu->oas = 48;
3622        }
3623
3624        if (arm_smmu_ops.pgsize_bitmap == -1UL)
3625                arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3626        else
3627                arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3628
3629        /* Set the DMA mask for our table walker */
3630        if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3631                dev_warn(smmu->dev,
3632                         "failed to set DMA mask for table walker\n");
3633
3634        smmu->ias = max(smmu->ias, smmu->oas);
3635
3636        if (arm_smmu_sva_supported(smmu))
3637                smmu->features |= ARM_SMMU_FEAT_SVA;
3638
3639        dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3640                 smmu->ias, smmu->oas, smmu->features);
3641        return 0;
3642}
3643
3644#ifdef CONFIG_ACPI
3645static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3646{
3647        switch (model) {
3648        case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3649                smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3650                break;
3651        case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3652                smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3653                break;
3654        }
3655
3656        dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3657}
3658
3659static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3660                                      struct arm_smmu_device *smmu)
3661{
3662        struct acpi_iort_smmu_v3 *iort_smmu;
3663        struct device *dev = smmu->dev;
3664        struct acpi_iort_node *node;
3665
3666        node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3667
3668        /* Retrieve SMMUv3 specific data */
3669        iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3670
3671        acpi_smmu_get_options(iort_smmu->model, smmu);
3672
3673        if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3674                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3675
3676        return 0;
3677}
3678#else
3679static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3680                                             struct arm_smmu_device *smmu)
3681{
3682        return -ENODEV;
3683}
3684#endif
3685
3686static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3687                                    struct arm_smmu_device *smmu)
3688{
3689        struct device *dev = &pdev->dev;
3690        u32 cells;
3691        int ret = -EINVAL;
3692
3693        if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3694                dev_err(dev, "missing #iommu-cells property\n");
3695        else if (cells != 1)
3696                dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3697        else
3698                ret = 0;
3699
3700        parse_driver_options(smmu);
3701
3702        if (of_dma_is_coherent(dev->of_node))
3703                smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3704
3705        return ret;
3706}
3707
3708static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3709{
3710        if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3711                return SZ_64K;
3712        else
3713                return SZ_128K;
3714}
3715
3716static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3717{
3718        int err;
3719
3720#ifdef CONFIG_PCI
3721        if (pci_bus_type.iommu_ops != ops) {
3722                err = bus_set_iommu(&pci_bus_type, ops);
3723                if (err)
3724                        return err;
3725        }
3726#endif
3727#ifdef CONFIG_ARM_AMBA
3728        if (amba_bustype.iommu_ops != ops) {
3729                err = bus_set_iommu(&amba_bustype, ops);
3730                if (err)
3731                        goto err_reset_pci_ops;
3732        }
3733#endif
3734        if (platform_bus_type.iommu_ops != ops) {
3735                err = bus_set_iommu(&platform_bus_type, ops);
3736                if (err)
3737                        goto err_reset_amba_ops;
3738        }
3739
3740        return 0;
3741
3742err_reset_amba_ops:
3743#ifdef CONFIG_ARM_AMBA
3744        bus_set_iommu(&amba_bustype, NULL);
3745#endif
3746err_reset_pci_ops: __maybe_unused;
3747#ifdef CONFIG_PCI
3748        bus_set_iommu(&pci_bus_type, NULL);
3749#endif
3750        return err;
3751}
3752
3753static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3754                                      resource_size_t size)
3755{
3756        struct resource res = DEFINE_RES_MEM(start, size);
3757
3758        return devm_ioremap_resource(dev, &res);
3759}
3760
3761static int arm_smmu_device_probe(struct platform_device *pdev)
3762{
3763        int irq, ret;
3764        struct resource *res;
3765        resource_size_t ioaddr;
3766        struct arm_smmu_device *smmu;
3767        struct device *dev = &pdev->dev;
3768        bool bypass;
3769
3770        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3771        if (!smmu)
3772                return -ENOMEM;
3773        smmu->dev = dev;
3774
3775        if (dev->of_node) {
3776                ret = arm_smmu_device_dt_probe(pdev, smmu);
3777        } else {
3778                ret = arm_smmu_device_acpi_probe(pdev, smmu);
3779                if (ret == -ENODEV)
3780                        return ret;
3781        }
3782
3783        /* Set bypass mode according to firmware probing result */
3784        bypass = !!ret;
3785
3786        /* Base address */
3787        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3788        if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3789                dev_err(dev, "MMIO region too small (%pr)\n", res);
3790                return -EINVAL;
3791        }
3792        ioaddr = res->start;
3793
3794        /*
3795         * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3796         * the PMCG registers which are reserved by the PMU driver.
3797         */
3798        smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3799        if (IS_ERR(smmu->base))
3800                return PTR_ERR(smmu->base);
3801
3802        if (arm_smmu_resource_size(smmu) > SZ_64K) {
3803                smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3804                                               ARM_SMMU_REG_SZ);
3805                if (IS_ERR(smmu->page1))
3806                        return PTR_ERR(smmu->page1);
3807        } else {
3808                smmu->page1 = smmu->base;
3809        }
3810
3811        /* Interrupt lines */
3812
3813        irq = platform_get_irq_byname_optional(pdev, "combined");
3814        if (irq > 0)
3815                smmu->combined_irq = irq;
3816        else {
3817                irq = platform_get_irq_byname_optional(pdev, "eventq");
3818                if (irq > 0)
3819                        smmu->evtq.q.irq = irq;
3820
3821                irq = platform_get_irq_byname_optional(pdev, "priq");
3822                if (irq > 0)
3823                        smmu->priq.q.irq = irq;
3824
3825                irq = platform_get_irq_byname_optional(pdev, "gerror");
3826                if (irq > 0)
3827                        smmu->gerr_irq = irq;
3828        }
3829        /* Probe the h/w */
3830        ret = arm_smmu_device_hw_probe(smmu);
3831        if (ret)
3832                return ret;
3833
3834        /* Initialise in-memory data structures */
3835        ret = arm_smmu_init_structures(smmu);
3836        if (ret)
3837                return ret;
3838
3839        /* Record our private device structure */
3840        platform_set_drvdata(pdev, smmu);
3841
3842        /* Reset the device */
3843        ret = arm_smmu_device_reset(smmu, bypass);
3844        if (ret)
3845                return ret;
3846
3847        /* And we're up. Go go go! */
3848        ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3849                                     "smmu3.%pa", &ioaddr);
3850        if (ret)
3851                return ret;
3852
3853        ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3854        if (ret) {
3855                dev_err(dev, "Failed to register iommu\n");
3856                goto err_sysfs_remove;
3857        }
3858
3859        ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3860        if (ret)
3861                goto err_unregister_device;
3862
3863        return 0;
3864
3865err_unregister_device:
3866        iommu_device_unregister(&smmu->iommu);
3867err_sysfs_remove:
3868        iommu_device_sysfs_remove(&smmu->iommu);
3869        return ret;
3870}
3871
3872static int arm_smmu_device_remove(struct platform_device *pdev)
3873{
3874        struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3875
3876        arm_smmu_set_bus_ops(NULL);
3877        iommu_device_unregister(&smmu->iommu);
3878        iommu_device_sysfs_remove(&smmu->iommu);
3879        arm_smmu_device_disable(smmu);
3880        iopf_queue_free(smmu->evtq.iopf);
3881
3882        return 0;
3883}
3884
3885static void arm_smmu_device_shutdown(struct platform_device *pdev)
3886{
3887        arm_smmu_device_remove(pdev);
3888}
3889
3890static const struct of_device_id arm_smmu_of_match[] = {
3891        { .compatible = "arm,smmu-v3", },
3892        { },
3893};
3894MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3895
3896static void arm_smmu_driver_unregister(struct platform_driver *drv)
3897{
3898        arm_smmu_sva_notifier_synchronize();
3899        platform_driver_unregister(drv);
3900}
3901
3902static struct platform_driver arm_smmu_driver = {
3903        .driver = {
3904                .name                   = "arm-smmu-v3",
3905                .of_match_table         = arm_smmu_of_match,
3906                .suppress_bind_attrs    = true,
3907        },
3908        .probe  = arm_smmu_device_probe,
3909        .remove = arm_smmu_device_remove,
3910        .shutdown = arm_smmu_device_shutdown,
3911};
3912module_driver(arm_smmu_driver, platform_driver_register,
3913              arm_smmu_driver_unregister);
3914
3915MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3916MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3917MODULE_ALIAS("platform:arm-smmu-v3");
3918MODULE_LICENSE("GPL v2");
3919