linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   3 */
   4
   5#include <linux/kernel.h>
   6#include <linux/types.h>
   7#include <linux/cpumask.h>
   8#include <linux/qcom_scm.h>
   9#include <linux/pm_opp.h>
  10#include <linux/nvmem-consumer.h>
  11#include <linux/slab.h>
  12#include "msm_gem.h"
  13#include "msm_mmu.h"
  14#include "a5xx_gpu.h"
  15
  16extern bool hang_debug;
  17static void a5xx_dump(struct msm_gpu *gpu);
  18
  19#define GPU_PAS_ID 13
  20
  21static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  22{
  23        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  24        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  25        uint32_t wptr;
  26        unsigned long flags;
  27
  28        spin_lock_irqsave(&ring->lock, flags);
  29
  30        /* Copy the shadow to the actual register */
  31        ring->cur = ring->next;
  32
  33        /* Make sure to wrap wptr if we need to */
  34        wptr = get_wptr(ring);
  35
  36        spin_unlock_irqrestore(&ring->lock, flags);
  37
  38        /* Make sure everything is posted before making a decision */
  39        mb();
  40
  41        /* Update HW if this is the current ring and we are not in preempt */
  42        if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  43                gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  44}
  45
  46static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  47        struct msm_file_private *ctx)
  48{
  49        struct msm_drm_private *priv = gpu->dev->dev_private;
  50        struct msm_ringbuffer *ring = submit->ring;
  51        struct msm_gem_object *obj;
  52        uint32_t *ptr, dwords;
  53        unsigned int i;
  54
  55        for (i = 0; i < submit->nr_cmds; i++) {
  56                switch (submit->cmd[i].type) {
  57                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  58                        break;
  59                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  60                        if (priv->lastctx == ctx)
  61                                break;
  62                        /* fall-thru */
  63                case MSM_SUBMIT_CMD_BUF:
  64                        /* copy commands into RB: */
  65                        obj = submit->bos[submit->cmd[i].idx].obj;
  66                        dwords = submit->cmd[i].size;
  67
  68                        ptr = msm_gem_get_vaddr(&obj->base);
  69
  70                        /* _get_vaddr() shouldn't fail at this point,
  71                         * since we've already mapped it once in
  72                         * submit_reloc()
  73                         */
  74                        if (WARN_ON(!ptr))
  75                                return;
  76
  77                        for (i = 0; i < dwords; i++) {
  78                                /* normally the OUT_PKTn() would wait
  79                                 * for space for the packet.  But since
  80                                 * we just OUT_RING() the whole thing,
  81                                 * need to call adreno_wait_ring()
  82                                 * ourself:
  83                                 */
  84                                adreno_wait_ring(ring, 1);
  85                                OUT_RING(ring, ptr[i]);
  86                        }
  87
  88                        msm_gem_put_vaddr(&obj->base);
  89
  90                        break;
  91                }
  92        }
  93
  94        a5xx_flush(gpu, ring);
  95        a5xx_preempt_trigger(gpu);
  96
  97        /* we might not necessarily have a cmd from userspace to
  98         * trigger an event to know that submit has completed, so
  99         * do this manually:
 100         */
 101        a5xx_idle(gpu, ring);
 102        ring->memptrs->fence = submit->seqno;
 103        msm_gpu_retire(gpu);
 104}
 105
 106static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 107        struct msm_file_private *ctx)
 108{
 109        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 110        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 111        struct msm_drm_private *priv = gpu->dev->dev_private;
 112        struct msm_ringbuffer *ring = submit->ring;
 113        unsigned int i, ibs = 0;
 114
 115        if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 116                priv->lastctx = NULL;
 117                a5xx_submit_in_rb(gpu, submit, ctx);
 118                return;
 119        }
 120
 121        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 122        OUT_RING(ring, 0x02);
 123
 124        /* Turn off protected mode to write to special registers */
 125        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 126        OUT_RING(ring, 0);
 127
 128        /* Set the save preemption record for the ring/command */
 129        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 130        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 131        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 132
 133        /* Turn back on protected mode */
 134        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 135        OUT_RING(ring, 1);
 136
 137        /* Enable local preemption for finegrain preemption */
 138        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 139        OUT_RING(ring, 0x02);
 140
 141        /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 142        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 143        OUT_RING(ring, 0x02);
 144
 145        /* Submit the commands */
 146        for (i = 0; i < submit->nr_cmds; i++) {
 147                switch (submit->cmd[i].type) {
 148                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 149                        break;
 150                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 151                        if (priv->lastctx == ctx)
 152                                break;
 153                        /* fall-thru */
 154                case MSM_SUBMIT_CMD_BUF:
 155                        OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 156                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 157                        OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 158                        OUT_RING(ring, submit->cmd[i].size);
 159                        ibs++;
 160                        break;
 161                }
 162        }
 163
 164        /*
 165         * Write the render mode to NULL (0) to indicate to the CP that the IBs
 166         * are done rendering - otherwise a lucky preemption would start
 167         * replaying from the last checkpoint
 168         */
 169        OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 170        OUT_RING(ring, 0);
 171        OUT_RING(ring, 0);
 172        OUT_RING(ring, 0);
 173        OUT_RING(ring, 0);
 174        OUT_RING(ring, 0);
 175
 176        /* Turn off IB level preemptions */
 177        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 178        OUT_RING(ring, 0x01);
 179
 180        /* Write the fence to the scratch register */
 181        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 182        OUT_RING(ring, submit->seqno);
 183
 184        /*
 185         * Execute a CACHE_FLUSH_TS event. This will ensure that the
 186         * timestamp is written to the memory and then triggers the interrupt
 187         */
 188        OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 189        OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 190        OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 191        OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 192        OUT_RING(ring, submit->seqno);
 193
 194        /* Yield the floor on command completion */
 195        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 196        /*
 197         * If dword[2:1] are non zero, they specify an address for the CP to
 198         * write the value of dword[3] to on preemption complete. Write 0 to
 199         * skip the write
 200         */
 201        OUT_RING(ring, 0x00);
 202        OUT_RING(ring, 0x00);
 203        /* Data value - not used if the address above is 0 */
 204        OUT_RING(ring, 0x01);
 205        /* Set bit 0 to trigger an interrupt on preempt complete */
 206        OUT_RING(ring, 0x01);
 207
 208        a5xx_flush(gpu, ring);
 209
 210        /* Check to see if we need to start preemption */
 211        a5xx_preempt_trigger(gpu);
 212}
 213
 214static const struct {
 215        u32 offset;
 216        u32 value;
 217} a5xx_hwcg[] = {
 218        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 219        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 220        {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 221        {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 222        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 223        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 224        {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 225        {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 226        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 227        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 228        {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 229        {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 230        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 231        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 232        {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 233        {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 234        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 235        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 236        {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 237        {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 238        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 239        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 240        {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 241        {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 242        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 243        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 244        {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 245        {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 246        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 247        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 248        {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 249        {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 250        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 251        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 252        {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 253        {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 254        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 255        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 256        {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 257        {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 258        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 259        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 260        {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 261        {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 262        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 263        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 264        {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 265        {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 266        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 267        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 268        {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 269        {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 270        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 271        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 272        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 273        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 274        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 275        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 276        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 277        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 278        {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 279        {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 280        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 281        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 282        {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 283        {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 284        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 285        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 286        {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 287        {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 288        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 289        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 290        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 291        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 292        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 293        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 294        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 295        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 296        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 297        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 298        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 299        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 300        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 301        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 302        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 303        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 304        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 305        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 306        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 307        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 308        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 309        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 310};
 311
 312void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 313{
 314        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 315        unsigned int i;
 316
 317        for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 318                gpu_write(gpu, a5xx_hwcg[i].offset,
 319                        state ? a5xx_hwcg[i].value : 0);
 320
 321        if (adreno_is_a540(adreno_gpu)) {
 322                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
 323                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
 324        }
 325
 326        gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 327        gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 328}
 329
 330static int a5xx_me_init(struct msm_gpu *gpu)
 331{
 332        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 333        struct msm_ringbuffer *ring = gpu->rb[0];
 334
 335        OUT_PKT7(ring, CP_ME_INIT, 8);
 336
 337        OUT_RING(ring, 0x0000002F);
 338
 339        /* Enable multiple hardware contexts */
 340        OUT_RING(ring, 0x00000003);
 341
 342        /* Enable error detection */
 343        OUT_RING(ring, 0x20000000);
 344
 345        /* Don't enable header dump */
 346        OUT_RING(ring, 0x00000000);
 347        OUT_RING(ring, 0x00000000);
 348
 349        /* Specify workarounds for various microcode issues */
 350        if (adreno_is_a530(adreno_gpu)) {
 351                /* Workaround for token end syncs
 352                 * Force a WFI after every direct-render 3D mode draw and every
 353                 * 2D mode 3 draw
 354                 */
 355                OUT_RING(ring, 0x0000000B);
 356        } else {
 357                /* No workarounds enabled */
 358                OUT_RING(ring, 0x00000000);
 359        }
 360
 361        OUT_RING(ring, 0x00000000);
 362        OUT_RING(ring, 0x00000000);
 363
 364        gpu->funcs->flush(gpu, ring);
 365        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 366}
 367
 368static int a5xx_preempt_start(struct msm_gpu *gpu)
 369{
 370        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 371        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 372        struct msm_ringbuffer *ring = gpu->rb[0];
 373
 374        if (gpu->nr_rings == 1)
 375                return 0;
 376
 377        /* Turn off protected mode to write to special registers */
 378        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 379        OUT_RING(ring, 0);
 380
 381        /* Set the save preemption record for the ring/command */
 382        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 383        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 384        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 385
 386        /* Turn back on protected mode */
 387        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 388        OUT_RING(ring, 1);
 389
 390        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 391        OUT_RING(ring, 0x00);
 392
 393        OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 394        OUT_RING(ring, 0x01);
 395
 396        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 397        OUT_RING(ring, 0x01);
 398
 399        /* Yield the floor on command completion */
 400        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 401        OUT_RING(ring, 0x00);
 402        OUT_RING(ring, 0x00);
 403        OUT_RING(ring, 0x01);
 404        OUT_RING(ring, 0x01);
 405
 406        gpu->funcs->flush(gpu, ring);
 407
 408        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 409}
 410
 411static int a5xx_ucode_init(struct msm_gpu *gpu)
 412{
 413        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 414        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 415        int ret;
 416
 417        if (!a5xx_gpu->pm4_bo) {
 418                a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 419                        adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 420
 421
 422                if (IS_ERR(a5xx_gpu->pm4_bo)) {
 423                        ret = PTR_ERR(a5xx_gpu->pm4_bo);
 424                        a5xx_gpu->pm4_bo = NULL;
 425                        DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
 426                                ret);
 427                        return ret;
 428                }
 429
 430                msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
 431        }
 432
 433        if (!a5xx_gpu->pfp_bo) {
 434                a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 435                        adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 436
 437                if (IS_ERR(a5xx_gpu->pfp_bo)) {
 438                        ret = PTR_ERR(a5xx_gpu->pfp_bo);
 439                        a5xx_gpu->pfp_bo = NULL;
 440                        DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
 441                                ret);
 442                        return ret;
 443                }
 444
 445                msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
 446        }
 447
 448        gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 449                REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 450
 451        gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 452                REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 453
 454        return 0;
 455}
 456
 457#define SCM_GPU_ZAP_SHADER_RESUME 0
 458
 459static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 460{
 461        int ret;
 462
 463        ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 464        if (ret)
 465                DRM_ERROR("%s: zap-shader resume failed: %d\n",
 466                        gpu->name, ret);
 467
 468        return ret;
 469}
 470
 471static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 472{
 473        static bool loaded;
 474        int ret;
 475
 476        /*
 477         * If the zap shader is already loaded into memory we just need to kick
 478         * the remote processor to reinitialize it
 479         */
 480        if (loaded)
 481                return a5xx_zap_shader_resume(gpu);
 482
 483        ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 484
 485        loaded = !ret;
 486        return ret;
 487}
 488
 489#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 490          A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 491          A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 492          A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 493          A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 494          A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 495          A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 496          A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 497          A5XX_RBBM_INT_0_MASK_CP_SW | \
 498          A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 499          A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 500          A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 501
 502static int a5xx_hw_init(struct msm_gpu *gpu)
 503{
 504        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 505        int ret;
 506
 507        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 508
 509        if (adreno_is_a540(adreno_gpu))
 510                gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 511
 512        /* Make all blocks contribute to the GPU BUSY perf counter */
 513        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 514
 515        /* Enable RBBM error reporting bits */
 516        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 517
 518        if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 519                /*
 520                 * Mask out the activity signals from RB1-3 to avoid false
 521                 * positives
 522                 */
 523
 524                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 525                        0xF0000000);
 526                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 527                        0xFFFFFFFF);
 528                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 529                        0xFFFFFFFF);
 530                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 531                        0xFFFFFFFF);
 532                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 533                        0xFFFFFFFF);
 534                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 535                        0xFFFFFFFF);
 536                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 537                        0xFFFFFFFF);
 538                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 539                        0xFFFFFFFF);
 540        }
 541
 542        /* Enable fault detection */
 543        gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 544                (1 << 30) | 0xFFFF);
 545
 546        /* Turn on performance counters */
 547        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 548
 549        /* Select CP0 to always count cycles */
 550        gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 551
 552        /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 553        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 554
 555        /* Increase VFD cache access so LRZ and other data gets evicted less */
 556        gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 557
 558        /* Disable L2 bypass in the UCHE */
 559        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 560        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 561        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 562        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 563
 564        /* Set the GMEM VA range (0 to gpu->gmem) */
 565        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 566        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 567        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 568                0x00100000 + adreno_gpu->gmem - 1);
 569        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 570
 571        gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 572        if (adreno_is_a530(adreno_gpu))
 573                gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 574        if (adreno_is_a540(adreno_gpu))
 575                gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 576        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 577        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 578
 579        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 580
 581        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 582                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 583
 584        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 585
 586        /* Enable USE_RETENTION_FLOPS */
 587        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 588
 589        /* Enable ME/PFP split notification */
 590        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 591
 592        /* Enable HWCG */
 593        a5xx_set_hwcg(gpu, true);
 594
 595        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 596
 597        /* Set the highest bank bit */
 598        gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 599        gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 600        if (adreno_is_a540(adreno_gpu))
 601                gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
 602
 603        /* Protect registers from the CP */
 604        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 605
 606        /* RBBM */
 607        gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 608        gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 609        gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 610        gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 611        gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 612        gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 613
 614        /* Content protect */
 615        gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 616                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 617                        16));
 618        gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 619                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 620
 621        /* CP */
 622        gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 623        gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 624        gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 625        gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 626
 627        /* RB */
 628        gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 629        gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 630
 631        /* VPC */
 632        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 633        gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 634
 635        /* UCHE */
 636        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 637
 638        if (adreno_is_a530(adreno_gpu))
 639                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 640                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 641
 642        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 643        /*
 644         * Disable the trusted memory range - we don't actually supported secure
 645         * memory rendering at this point in time and we don't want to block off
 646         * part of the virtual memory space.
 647         */
 648        gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 649                REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 650        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 651
 652        /* Put the GPU into 64 bit by default */
 653        gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
 654        gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
 655        gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
 656        gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
 657        gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
 658        gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
 659        gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
 660        gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
 661        gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
 662        gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
 663        gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
 664        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 665
 666        /*
 667         * VPC corner case with local memory load kill leads to corrupt
 668         * internal state. Normal Disable does not work for all a5x chips.
 669         * So do the following setting to disable it.
 670         */
 671        if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
 672                gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
 673                gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
 674        }
 675
 676        ret = adreno_hw_init(gpu);
 677        if (ret)
 678                return ret;
 679
 680        a5xx_preempt_hw_init(gpu);
 681
 682        a5xx_gpmu_ucode_init(gpu);
 683
 684        ret = a5xx_ucode_init(gpu);
 685        if (ret)
 686                return ret;
 687
 688        /* Disable the interrupts through the initial bringup stage */
 689        gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 690
 691        /* Clear ME_HALT to start the micro engine */
 692        gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 693        ret = a5xx_me_init(gpu);
 694        if (ret)
 695                return ret;
 696
 697        ret = a5xx_power_init(gpu);
 698        if (ret)
 699                return ret;
 700
 701        /*
 702         * Send a pipeline event stat to get misbehaving counters to start
 703         * ticking correctly
 704         */
 705        if (adreno_is_a530(adreno_gpu)) {
 706                OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 707                OUT_RING(gpu->rb[0], 0x0F);
 708
 709                gpu->funcs->flush(gpu, gpu->rb[0]);
 710                if (!a5xx_idle(gpu, gpu->rb[0]))
 711                        return -EINVAL;
 712        }
 713
 714        /*
 715         * Try to load a zap shader into the secure world. If successful
 716         * we can use the CP to switch out of secure mode. If not then we
 717         * have no resource but to try to switch ourselves out manually. If we
 718         * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 719         * be blocked and a permissions violation will soon follow.
 720         */
 721        ret = a5xx_zap_shader_init(gpu);
 722        if (!ret) {
 723                OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 724                OUT_RING(gpu->rb[0], 0x00000000);
 725
 726                gpu->funcs->flush(gpu, gpu->rb[0]);
 727                if (!a5xx_idle(gpu, gpu->rb[0]))
 728                        return -EINVAL;
 729        } else {
 730                /* Print a warning so if we die, we know why */
 731                dev_warn_once(gpu->dev->dev,
 732                        "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 733                gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 734        }
 735
 736        /* Last step - yield the ringbuffer */
 737        a5xx_preempt_start(gpu);
 738
 739        return 0;
 740}
 741
 742static void a5xx_recover(struct msm_gpu *gpu)
 743{
 744        int i;
 745
 746        adreno_dump_info(gpu);
 747
 748        for (i = 0; i < 8; i++) {
 749                printk("CP_SCRATCH_REG%d: %u\n", i,
 750                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 751        }
 752
 753        if (hang_debug)
 754                a5xx_dump(gpu);
 755
 756        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 757        gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 758        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 759        adreno_recover(gpu);
 760}
 761
 762static void a5xx_destroy(struct msm_gpu *gpu)
 763{
 764        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 765        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 766
 767        DBG("%s", gpu->name);
 768
 769        a5xx_preempt_fini(gpu);
 770
 771        if (a5xx_gpu->pm4_bo) {
 772                msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 773                drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 774        }
 775
 776        if (a5xx_gpu->pfp_bo) {
 777                msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 778                drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 779        }
 780
 781        if (a5xx_gpu->gpmu_bo) {
 782                msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 783                drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 784        }
 785
 786        adreno_gpu_cleanup(adreno_gpu);
 787        kfree(a5xx_gpu);
 788}
 789
 790static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 791{
 792        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 793                return false;
 794
 795        /*
 796         * Nearly every abnormality ends up pausing the GPU and triggering a
 797         * fault so we can safely just watch for this one interrupt to fire
 798         */
 799        return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 800                A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 801}
 802
 803bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 804{
 805        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 806        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 807
 808        if (ring != a5xx_gpu->cur_ring) {
 809                WARN(1, "Tried to idle a non-current ringbuffer\n");
 810                return false;
 811        }
 812
 813        /* wait for CP to drain ringbuffer: */
 814        if (!adreno_idle(gpu, ring))
 815                return false;
 816
 817        if (spin_until(_a5xx_check_idle(gpu))) {
 818                DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 819                        gpu->name, __builtin_return_address(0),
 820                        gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 821                        gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 822                        gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 823                        gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 824                return false;
 825        }
 826
 827        return true;
 828}
 829
 830static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 831{
 832        struct msm_gpu *gpu = arg;
 833        pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 834                        iova, flags,
 835                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 836                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 837                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 838                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 839
 840        return -EFAULT;
 841}
 842
 843static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 844{
 845        u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 846
 847        if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 848                u32 val;
 849
 850                gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 851
 852                /*
 853                 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 854                 * read it twice
 855                 */
 856
 857                gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 858                val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 859
 860                dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 861                        val);
 862        }
 863
 864        if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 865                dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 866                        gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 867
 868        if (status & A5XX_CP_INT_CP_DMA_ERROR)
 869                dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 870
 871        if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 872                u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 873
 874                dev_err_ratelimited(gpu->dev->dev,
 875                        "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 876                        val & (1 << 24) ? "WRITE" : "READ",
 877                        (val & 0xFFFFF) >> 2, val);
 878        }
 879
 880        if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 881                u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 882                const char *access[16] = { "reserved", "reserved",
 883                        "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 884                        "", "", "me read", "me write", "", "", "crashdump read",
 885                        "crashdump write" };
 886
 887                dev_err_ratelimited(gpu->dev->dev,
 888                        "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 889                        status & 0xFFFFF, access[(status >> 24) & 0xF],
 890                        (status & (1 << 31)), status);
 891        }
 892}
 893
 894static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 895{
 896        if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 897                u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 898
 899                dev_err_ratelimited(gpu->dev->dev,
 900                        "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 901                        val & (1 << 28) ? "WRITE" : "READ",
 902                        (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 903                        (val >> 24) & 0xF);
 904
 905                /* Clear the error */
 906                gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 907
 908                /* Clear the interrupt */
 909                gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 910                        A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 911        }
 912
 913        if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 914                dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 915
 916        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 917                dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 918                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 919
 920        if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 921                dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 922                        gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 923
 924        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 925                dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 926                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 927
 928        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 929                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
 930
 931        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
 932                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
 933}
 934
 935static void a5xx_uche_err_irq(struct msm_gpu *gpu)
 936{
 937        uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
 938
 939        addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
 940
 941        dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
 942                addr);
 943}
 944
 945static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
 946{
 947        dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
 948}
 949
 950static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 951{
 952        struct drm_device *dev = gpu->dev;
 953        struct msm_drm_private *priv = dev->dev_private;
 954        struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 955
 956        DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 957                ring ? ring->id : -1, ring ? ring->seqno : 0,
 958                gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 959                gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 960                gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
 961                gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
 962                gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
 963                gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
 964                gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
 965
 966        /* Turn off the hangcheck timer to keep it from bothering us */
 967        del_timer(&gpu->hangcheck_timer);
 968
 969        queue_work(priv->wq, &gpu->recover_work);
 970}
 971
 972#define RBBM_ERROR_MASK \
 973        (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 974        A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 975        A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 976        A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 977        A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 978        A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 979
 980static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
 981{
 982        u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
 983
 984        /*
 985         * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
 986         * before the source is cleared the interrupt will storm.
 987         */
 988        gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 989                status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 990
 991        /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
 992        if (status & RBBM_ERROR_MASK)
 993                a5xx_rbbm_err_irq(gpu, status);
 994
 995        if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
 996                a5xx_cp_err_irq(gpu);
 997
 998        if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
 999                a5xx_fault_detect_irq(gpu);
1000
1001        if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1002                a5xx_uche_err_irq(gpu);
1003
1004        if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1005                a5xx_gpmu_err_irq(gpu);
1006
1007        if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1008                a5xx_preempt_trigger(gpu);
1009                msm_gpu_retire(gpu);
1010        }
1011
1012        if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1013                a5xx_preempt_irq(gpu);
1014
1015        return IRQ_HANDLED;
1016}
1017
1018static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1019        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1020        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1021        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1022        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1023                REG_A5XX_CP_RB_RPTR_ADDR_HI),
1024        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1025        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1026        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1027};
1028
1029static const u32 a5xx_registers[] = {
1030        0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1031        0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1032        0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1033        0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1034        0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1035        0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1036        0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1037        0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1038        0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1039        0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1040        0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1041        0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1042        0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1043        0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1044        0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1045        0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1046        0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1047        0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1048        0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1049        0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1050        0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1051        0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1052        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1053        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1054        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1055        0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1056        0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1057        0xAC60, 0xAC60, ~0,
1058};
1059
1060static void a5xx_dump(struct msm_gpu *gpu)
1061{
1062        DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1063                gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1064        adreno_dump(gpu);
1065}
1066
1067static int a5xx_pm_resume(struct msm_gpu *gpu)
1068{
1069        int ret;
1070
1071        /* Turn on the core power */
1072        ret = msm_gpu_pm_resume(gpu);
1073        if (ret)
1074                return ret;
1075
1076        /* Turn the RBCCU domain first to limit the chances of voltage droop */
1077        gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1078
1079        /* Wait 3 usecs before polling */
1080        udelay(3);
1081
1082        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1083                (1 << 20), (1 << 20));
1084        if (ret) {
1085                DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1086                        gpu->name,
1087                        gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1088                return ret;
1089        }
1090
1091        /* Turn on the SP domain */
1092        gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1093        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1094                (1 << 20), (1 << 20));
1095        if (ret)
1096                DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1097                        gpu->name);
1098
1099        return ret;
1100}
1101
1102static int a5xx_pm_suspend(struct msm_gpu *gpu)
1103{
1104        /* Clear the VBIF pipe before shutting down */
1105        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1106        spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1107
1108        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1109
1110        /*
1111         * Reset the VBIF before power collapse to avoid issue with FIFO
1112         * entries
1113         */
1114        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1115        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1116
1117        return msm_gpu_pm_suspend(gpu);
1118}
1119
1120static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1121{
1122        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1123                REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1124
1125        return 0;
1126}
1127
1128struct a5xx_crashdumper {
1129        void *ptr;
1130        struct drm_gem_object *bo;
1131        u64 iova;
1132};
1133
1134struct a5xx_gpu_state {
1135        struct msm_gpu_state base;
1136        u32 *hlsqregs;
1137};
1138
1139static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1140                struct a5xx_crashdumper *dumper)
1141{
1142        dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1143                SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1144                &dumper->bo, &dumper->iova);
1145
1146        if (!IS_ERR(dumper->ptr))
1147                msm_gem_object_set_name(dumper->bo, "crashdump");
1148
1149        return PTR_ERR_OR_ZERO(dumper->ptr);
1150}
1151
1152static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1153                struct a5xx_crashdumper *dumper)
1154{
1155        u32 val;
1156
1157        if (IS_ERR_OR_NULL(dumper->ptr))
1158                return -EINVAL;
1159
1160        gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1161                REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1162
1163        gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1164
1165        return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1166                val & 0x04, 100, 10000);
1167}
1168
1169/*
1170 * These are a list of the registers that need to be read through the HLSQ
1171 * aperture through the crashdumper.  These are not nominally accessible from
1172 * the CPU on a secure platform.
1173 */
1174static const struct {
1175        u32 type;
1176        u32 regoffset;
1177        u32 count;
1178} a5xx_hlsq_aperture_regs[] = {
1179        { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1180        { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1181        { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1182        { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1183        { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1184        { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1185        { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1186        { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1187        { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1188        { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1189        { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1190        { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1191        { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1192        { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1193        { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1194};
1195
1196static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1197                struct a5xx_gpu_state *a5xx_state)
1198{
1199        struct a5xx_crashdumper dumper = { 0 };
1200        u32 offset, count = 0;
1201        u64 *ptr;
1202        int i;
1203
1204        if (a5xx_crashdumper_init(gpu, &dumper))
1205                return;
1206
1207        /* The script will be written at offset 0 */
1208        ptr = dumper.ptr;
1209
1210        /* Start writing the data at offset 256k */
1211        offset = dumper.iova + (256 * SZ_1K);
1212
1213        /* Count how many additional registers to get from the HLSQ aperture */
1214        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1215                count += a5xx_hlsq_aperture_regs[i].count;
1216
1217        a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1218        if (!a5xx_state->hlsqregs)
1219                return;
1220
1221        /* Build the crashdump script */
1222        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1223                u32 type = a5xx_hlsq_aperture_regs[i].type;
1224                u32 c = a5xx_hlsq_aperture_regs[i].count;
1225
1226                /* Write the register to select the desired bank */
1227                *ptr++ = ((u64) type << 8);
1228                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1229                        (1 << 21) | 1;
1230
1231                *ptr++ = offset;
1232                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1233                        | c;
1234
1235                offset += c * sizeof(u32);
1236        }
1237
1238        /* Write two zeros to close off the script */
1239        *ptr++ = 0;
1240        *ptr++ = 0;
1241
1242        if (a5xx_crashdumper_run(gpu, &dumper)) {
1243                kfree(a5xx_state->hlsqregs);
1244                msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1245                return;
1246        }
1247
1248        /* Copy the data from the crashdumper to the state */
1249        memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1250                count * sizeof(u32));
1251
1252        msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1253}
1254
1255static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1256{
1257        struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1258                        GFP_KERNEL);
1259
1260        if (!a5xx_state)
1261                return ERR_PTR(-ENOMEM);
1262
1263        /* Temporarily disable hardware clock gating before reading the hw */
1264        a5xx_set_hwcg(gpu, false);
1265
1266        /* First get the generic state from the adreno core */
1267        adreno_gpu_state_get(gpu, &(a5xx_state->base));
1268
1269        a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1270
1271        /* Get the HLSQ regs with the help of the crashdumper */
1272        a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1273
1274        a5xx_set_hwcg(gpu, true);
1275
1276        return &a5xx_state->base;
1277}
1278
1279static void a5xx_gpu_state_destroy(struct kref *kref)
1280{
1281        struct msm_gpu_state *state = container_of(kref,
1282                struct msm_gpu_state, ref);
1283        struct a5xx_gpu_state *a5xx_state = container_of(state,
1284                struct a5xx_gpu_state, base);
1285
1286        kfree(a5xx_state->hlsqregs);
1287
1288        adreno_gpu_state_destroy(state);
1289        kfree(a5xx_state);
1290}
1291
1292int a5xx_gpu_state_put(struct msm_gpu_state *state)
1293{
1294        if (IS_ERR_OR_NULL(state))
1295                return 1;
1296
1297        return kref_put(&state->ref, a5xx_gpu_state_destroy);
1298}
1299
1300
1301#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1302void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1303                struct drm_printer *p)
1304{
1305        int i, j;
1306        u32 pos = 0;
1307        struct a5xx_gpu_state *a5xx_state = container_of(state,
1308                struct a5xx_gpu_state, base);
1309
1310        if (IS_ERR_OR_NULL(state))
1311                return;
1312
1313        adreno_show(gpu, state, p);
1314
1315        /* Dump the additional a5xx HLSQ registers */
1316        if (!a5xx_state->hlsqregs)
1317                return;
1318
1319        drm_printf(p, "registers-hlsq:\n");
1320
1321        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1322                u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1323                u32 c = a5xx_hlsq_aperture_regs[i].count;
1324
1325                for (j = 0; j < c; j++, pos++, o++) {
1326                        /*
1327                         * To keep the crashdump simple we pull the entire range
1328                         * for each register type but not all of the registers
1329                         * in the range are valid. Fortunately invalid registers
1330                         * stick out like a sore thumb with a value of
1331                         * 0xdeadbeef
1332                         */
1333                        if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1334                                continue;
1335
1336                        drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1337                                o << 2, a5xx_state->hlsqregs[pos]);
1338                }
1339        }
1340}
1341#endif
1342
1343static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1344{
1345        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1346        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1347
1348        return a5xx_gpu->cur_ring;
1349}
1350
1351static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1352{
1353        u64 busy_cycles, busy_time;
1354
1355        busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1356                        REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1357
1358        busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1359        do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1360
1361        gpu->devfreq.busy_cycles = busy_cycles;
1362
1363        if (WARN_ON(busy_time > ~0LU))
1364                return ~0LU;
1365
1366        return (unsigned long)busy_time;
1367}
1368
1369static const struct adreno_gpu_funcs funcs = {
1370        .base = {
1371                .get_param = adreno_get_param,
1372                .hw_init = a5xx_hw_init,
1373                .pm_suspend = a5xx_pm_suspend,
1374                .pm_resume = a5xx_pm_resume,
1375                .recover = a5xx_recover,
1376                .submit = a5xx_submit,
1377                .flush = a5xx_flush,
1378                .active_ring = a5xx_active_ring,
1379                .irq = a5xx_irq,
1380                .destroy = a5xx_destroy,
1381#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1382                .show = a5xx_show,
1383#endif
1384#if defined(CONFIG_DEBUG_FS)
1385                .debugfs_init = a5xx_debugfs_init,
1386#endif
1387                .gpu_busy = a5xx_gpu_busy,
1388                .gpu_state_get = a5xx_gpu_state_get,
1389                .gpu_state_put = a5xx_gpu_state_put,
1390        },
1391        .get_timestamp = a5xx_get_timestamp,
1392};
1393
1394static void check_speed_bin(struct device *dev)
1395{
1396        struct nvmem_cell *cell;
1397        u32 bin, val;
1398
1399        cell = nvmem_cell_get(dev, "speed_bin");
1400
1401        /* If a nvmem cell isn't defined, nothing to do */
1402        if (IS_ERR(cell))
1403                return;
1404
1405        bin = *((u32 *) nvmem_cell_read(cell, NULL));
1406        nvmem_cell_put(cell);
1407
1408        val = (1 << bin);
1409
1410        dev_pm_opp_set_supported_hw(dev, &val, 1);
1411}
1412
1413struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1414{
1415        struct msm_drm_private *priv = dev->dev_private;
1416        struct platform_device *pdev = priv->gpu_pdev;
1417        struct a5xx_gpu *a5xx_gpu = NULL;
1418        struct adreno_gpu *adreno_gpu;
1419        struct msm_gpu *gpu;
1420        int ret;
1421
1422        if (!pdev) {
1423                DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1424                return ERR_PTR(-ENXIO);
1425        }
1426
1427        a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1428        if (!a5xx_gpu)
1429                return ERR_PTR(-ENOMEM);
1430
1431        adreno_gpu = &a5xx_gpu->base;
1432        gpu = &adreno_gpu->base;
1433
1434        adreno_gpu->registers = a5xx_registers;
1435        adreno_gpu->reg_offsets = a5xx_register_offsets;
1436
1437        a5xx_gpu->lm_leakage = 0x4E001A;
1438
1439        check_speed_bin(&pdev->dev);
1440
1441        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1442        if (ret) {
1443                a5xx_destroy(&(a5xx_gpu->base.base));
1444                return ERR_PTR(ret);
1445        }
1446
1447        if (gpu->aspace)
1448                msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1449
1450        /* Set up the preemption specific bits and pieces for each ringbuffer */
1451        a5xx_preempt_init(gpu);
1452
1453        return gpu;
1454}
1455