linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   3 */
   4
   5#include <linux/kernel.h>
   6#include <linux/types.h>
   7#include <linux/cpumask.h>
   8#include <linux/qcom_scm.h>
   9#include <linux/pm_opp.h>
  10#include <linux/nvmem-consumer.h>
  11#include <linux/slab.h>
  12#include "msm_gem.h"
  13#include "msm_mmu.h"
  14#include "a5xx_gpu.h"
  15
  16extern bool hang_debug;
  17static void a5xx_dump(struct msm_gpu *gpu);
  18
  19#define GPU_PAS_ID 13
  20
  21static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  22{
  23        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  24        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  25
  26        if (a5xx_gpu->has_whereami) {
  27                OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  28                OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
  29                OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
  30        }
  31}
  32
  33void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
  34                bool sync)
  35{
  36        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  37        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  38        uint32_t wptr;
  39        unsigned long flags;
  40
  41        /*
  42         * Most flush operations need to issue a WHERE_AM_I opcode to sync up
  43         * the rptr shadow
  44         */
  45        if (sync)
  46                update_shadow_rptr(gpu, ring);
  47
  48        spin_lock_irqsave(&ring->preempt_lock, flags);
  49
  50        /* Copy the shadow to the actual register */
  51        ring->cur = ring->next;
  52
  53        /* Make sure to wrap wptr if we need to */
  54        wptr = get_wptr(ring);
  55
  56        spin_unlock_irqrestore(&ring->preempt_lock, flags);
  57
  58        /* Make sure everything is posted before making a decision */
  59        mb();
  60
  61        /* Update HW if this is the current ring and we are not in preempt */
  62        if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  63                gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  64}
  65
  66static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  67{
  68        struct msm_drm_private *priv = gpu->dev->dev_private;
  69        struct msm_ringbuffer *ring = submit->ring;
  70        struct msm_gem_object *obj;
  71        uint32_t *ptr, dwords;
  72        unsigned int i;
  73
  74        for (i = 0; i < submit->nr_cmds; i++) {
  75                switch (submit->cmd[i].type) {
  76                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  77                        break;
  78                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  79                        if (priv->lastctx == submit->queue->ctx)
  80                                break;
  81                        fallthrough;
  82                case MSM_SUBMIT_CMD_BUF:
  83                        /* copy commands into RB: */
  84                        obj = submit->bos[submit->cmd[i].idx].obj;
  85                        dwords = submit->cmd[i].size;
  86
  87                        ptr = msm_gem_get_vaddr(&obj->base);
  88
  89                        /* _get_vaddr() shouldn't fail at this point,
  90                         * since we've already mapped it once in
  91                         * submit_reloc()
  92                         */
  93                        if (WARN_ON(!ptr))
  94                                return;
  95
  96                        for (i = 0; i < dwords; i++) {
  97                                /* normally the OUT_PKTn() would wait
  98                                 * for space for the packet.  But since
  99                                 * we just OUT_RING() the whole thing,
 100                                 * need to call adreno_wait_ring()
 101                                 * ourself:
 102                                 */
 103                                adreno_wait_ring(ring, 1);
 104                                OUT_RING(ring, ptr[i]);
 105                        }
 106
 107                        msm_gem_put_vaddr(&obj->base);
 108
 109                        break;
 110                }
 111        }
 112
 113        a5xx_flush(gpu, ring, true);
 114        a5xx_preempt_trigger(gpu);
 115
 116        /* we might not necessarily have a cmd from userspace to
 117         * trigger an event to know that submit has completed, so
 118         * do this manually:
 119         */
 120        a5xx_idle(gpu, ring);
 121        ring->memptrs->fence = submit->seqno;
 122        msm_gpu_retire(gpu);
 123}
 124
 125static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 126{
 127        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 128        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 129        struct msm_drm_private *priv = gpu->dev->dev_private;
 130        struct msm_ringbuffer *ring = submit->ring;
 131        unsigned int i, ibs = 0;
 132
 133        if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 134                priv->lastctx = NULL;
 135                a5xx_submit_in_rb(gpu, submit);
 136                return;
 137        }
 138
 139        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 140        OUT_RING(ring, 0x02);
 141
 142        /* Turn off protected mode to write to special registers */
 143        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 144        OUT_RING(ring, 0);
 145
 146        /* Set the save preemption record for the ring/command */
 147        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 148        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 149        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 150
 151        /* Turn back on protected mode */
 152        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 153        OUT_RING(ring, 1);
 154
 155        /* Enable local preemption for finegrain preemption */
 156        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 157        OUT_RING(ring, 0x02);
 158
 159        /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 160        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 161        OUT_RING(ring, 0x02);
 162
 163        /* Submit the commands */
 164        for (i = 0; i < submit->nr_cmds; i++) {
 165                switch (submit->cmd[i].type) {
 166                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 167                        break;
 168                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 169                        if (priv->lastctx == submit->queue->ctx)
 170                                break;
 171                        fallthrough;
 172                case MSM_SUBMIT_CMD_BUF:
 173                        OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 174                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 175                        OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 176                        OUT_RING(ring, submit->cmd[i].size);
 177                        ibs++;
 178                        break;
 179                }
 180
 181                /*
 182                 * Periodically update shadow-wptr if needed, so that we
 183                 * can see partial progress of submits with large # of
 184                 * cmds.. otherwise we could needlessly stall waiting for
 185                 * ringbuffer state, simply due to looking at a shadow
 186                 * rptr value that has not been updated
 187                 */
 188                if ((ibs % 32) == 0)
 189                        update_shadow_rptr(gpu, ring);
 190        }
 191
 192        /*
 193         * Write the render mode to NULL (0) to indicate to the CP that the IBs
 194         * are done rendering - otherwise a lucky preemption would start
 195         * replaying from the last checkpoint
 196         */
 197        OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 198        OUT_RING(ring, 0);
 199        OUT_RING(ring, 0);
 200        OUT_RING(ring, 0);
 201        OUT_RING(ring, 0);
 202        OUT_RING(ring, 0);
 203
 204        /* Turn off IB level preemptions */
 205        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 206        OUT_RING(ring, 0x01);
 207
 208        /* Write the fence to the scratch register */
 209        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 210        OUT_RING(ring, submit->seqno);
 211
 212        /*
 213         * Execute a CACHE_FLUSH_TS event. This will ensure that the
 214         * timestamp is written to the memory and then triggers the interrupt
 215         */
 216        OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 217        OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 218                CP_EVENT_WRITE_0_IRQ);
 219        OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 220        OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 221        OUT_RING(ring, submit->seqno);
 222
 223        /* Yield the floor on command completion */
 224        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 225        /*
 226         * If dword[2:1] are non zero, they specify an address for the CP to
 227         * write the value of dword[3] to on preemption complete. Write 0 to
 228         * skip the write
 229         */
 230        OUT_RING(ring, 0x00);
 231        OUT_RING(ring, 0x00);
 232        /* Data value - not used if the address above is 0 */
 233        OUT_RING(ring, 0x01);
 234        /* Set bit 0 to trigger an interrupt on preempt complete */
 235        OUT_RING(ring, 0x01);
 236
 237        /* A WHERE_AM_I packet is not needed after a YIELD */
 238        a5xx_flush(gpu, ring, false);
 239
 240        /* Check to see if we need to start preemption */
 241        a5xx_preempt_trigger(gpu);
 242}
 243
 244static const struct adreno_five_hwcg_regs {
 245        u32 offset;
 246        u32 value;
 247} a5xx_hwcg[] = {
 248        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 249        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 250        {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 251        {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 252        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 253        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 254        {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 255        {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 256        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 257        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 258        {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 259        {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 260        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 261        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 262        {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 263        {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 264        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 265        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 266        {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 267        {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 268        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 269        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 270        {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 271        {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 272        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 273        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 274        {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 275        {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 276        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 277        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 278        {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 279        {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 280        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 281        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 282        {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 283        {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 284        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 285        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 286        {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 287        {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 288        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 289        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 290        {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 291        {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 292        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 293        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 294        {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 295        {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 296        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 297        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 298        {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 299        {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 300        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 301        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 302        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 303        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 304        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 305        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 306        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 307        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 308        {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 309        {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 310        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 311        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 312        {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 313        {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 314        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 315        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 316        {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 317        {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 318        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 319        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 320        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 321        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 322        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 323        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 324        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 325        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 326        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 327        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 328        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 329        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 330        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 331        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 332        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 333        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 334        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 335        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 336        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 337        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 338        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 339        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 340}, a50x_hwcg[] = {
 341        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 342        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 343        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 344        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 345        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 346        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 347        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 348        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 349        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 350        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 351        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 352        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 353        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 354        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 355        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 356        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 357        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 358        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
 359        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 360        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 361        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 362        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 363        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 364        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 365        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 366        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 367        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 368        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 369        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 370        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 371        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 372        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 373        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 374        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 375        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 376        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 377        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 378        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 379}, a512_hwcg[] = {
 380        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 381        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 382        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 383        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 384        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 385        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 386        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 387        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 388        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 389        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 390        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 391        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 392        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 393        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 394        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 395        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 396        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 397        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 398        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 399        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 400        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 401        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 402        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 403        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 404        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 405        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 406        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 407        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 408        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 409        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 410        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 411        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 412        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 413        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 414        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 415        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 416        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 417        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 418        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 419        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 420        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 421        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 422        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 423        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 424        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 425        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 426        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 427        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 428        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 429        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 430        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 431        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 432        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 433        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 434        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 435        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 436};
 437
 438void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 439{
 440        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 441        const struct adreno_five_hwcg_regs *regs;
 442        unsigned int i, sz;
 443
 444        if (adreno_is_a508(adreno_gpu)) {
 445                regs = a50x_hwcg;
 446                sz = ARRAY_SIZE(a50x_hwcg);
 447        } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
 448                regs = a512_hwcg;
 449                sz = ARRAY_SIZE(a512_hwcg);
 450        } else {
 451                regs = a5xx_hwcg;
 452                sz = ARRAY_SIZE(a5xx_hwcg);
 453        }
 454
 455        for (i = 0; i < sz; i++)
 456                gpu_write(gpu, regs[i].offset,
 457                          state ? regs[i].value : 0);
 458
 459        if (adreno_is_a540(adreno_gpu)) {
 460                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
 461                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
 462        }
 463
 464        gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 465        gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 466}
 467
 468static int a5xx_me_init(struct msm_gpu *gpu)
 469{
 470        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 471        struct msm_ringbuffer *ring = gpu->rb[0];
 472
 473        OUT_PKT7(ring, CP_ME_INIT, 8);
 474
 475        OUT_RING(ring, 0x0000002F);
 476
 477        /* Enable multiple hardware contexts */
 478        OUT_RING(ring, 0x00000003);
 479
 480        /* Enable error detection */
 481        OUT_RING(ring, 0x20000000);
 482
 483        /* Don't enable header dump */
 484        OUT_RING(ring, 0x00000000);
 485        OUT_RING(ring, 0x00000000);
 486
 487        /* Specify workarounds for various microcode issues */
 488        if (adreno_is_a530(adreno_gpu)) {
 489                /* Workaround for token end syncs
 490                 * Force a WFI after every direct-render 3D mode draw and every
 491                 * 2D mode 3 draw
 492                 */
 493                OUT_RING(ring, 0x0000000B);
 494        } else if (adreno_is_a510(adreno_gpu)) {
 495                /* Workaround for token and syncs */
 496                OUT_RING(ring, 0x00000001);
 497        } else {
 498                /* No workarounds enabled */
 499                OUT_RING(ring, 0x00000000);
 500        }
 501
 502        OUT_RING(ring, 0x00000000);
 503        OUT_RING(ring, 0x00000000);
 504
 505        a5xx_flush(gpu, ring, true);
 506        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 507}
 508
 509static int a5xx_preempt_start(struct msm_gpu *gpu)
 510{
 511        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 512        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 513        struct msm_ringbuffer *ring = gpu->rb[0];
 514
 515        if (gpu->nr_rings == 1)
 516                return 0;
 517
 518        /* Turn off protected mode to write to special registers */
 519        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 520        OUT_RING(ring, 0);
 521
 522        /* Set the save preemption record for the ring/command */
 523        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 524        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 525        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 526
 527        /* Turn back on protected mode */
 528        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 529        OUT_RING(ring, 1);
 530
 531        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 532        OUT_RING(ring, 0x00);
 533
 534        OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 535        OUT_RING(ring, 0x01);
 536
 537        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 538        OUT_RING(ring, 0x01);
 539
 540        /* Yield the floor on command completion */
 541        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 542        OUT_RING(ring, 0x00);
 543        OUT_RING(ring, 0x00);
 544        OUT_RING(ring, 0x01);
 545        OUT_RING(ring, 0x01);
 546
 547        /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
 548        a5xx_flush(gpu, ring, false);
 549
 550        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 551}
 552
 553static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
 554                struct drm_gem_object *obj)
 555{
 556        u32 *buf = msm_gem_get_vaddr(obj);
 557
 558        if (IS_ERR(buf))
 559                return;
 560
 561        /*
 562         * If the lowest nibble is 0xa that is an indication that this microcode
 563         * has been patched. The actual version is in dword [3] but we only care
 564         * about the patchlevel which is the lowest nibble of dword [3]
 565         */
 566        if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
 567                a5xx_gpu->has_whereami = true;
 568
 569        msm_gem_put_vaddr(obj);
 570}
 571
 572static int a5xx_ucode_init(struct msm_gpu *gpu)
 573{
 574        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 575        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 576        int ret;
 577
 578        if (!a5xx_gpu->pm4_bo) {
 579                a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 580                        adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 581
 582
 583                if (IS_ERR(a5xx_gpu->pm4_bo)) {
 584                        ret = PTR_ERR(a5xx_gpu->pm4_bo);
 585                        a5xx_gpu->pm4_bo = NULL;
 586                        DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
 587                                ret);
 588                        return ret;
 589                }
 590
 591                msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
 592        }
 593
 594        if (!a5xx_gpu->pfp_bo) {
 595                a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 596                        adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 597
 598                if (IS_ERR(a5xx_gpu->pfp_bo)) {
 599                        ret = PTR_ERR(a5xx_gpu->pfp_bo);
 600                        a5xx_gpu->pfp_bo = NULL;
 601                        DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
 602                                ret);
 603                        return ret;
 604                }
 605
 606                msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
 607                a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
 608        }
 609
 610        gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 611                REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 612
 613        gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 614                REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 615
 616        return 0;
 617}
 618
 619#define SCM_GPU_ZAP_SHADER_RESUME 0
 620
 621static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 622{
 623        int ret;
 624
 625        ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 626        if (ret)
 627                DRM_ERROR("%s: zap-shader resume failed: %d\n",
 628                        gpu->name, ret);
 629
 630        return ret;
 631}
 632
 633static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 634{
 635        static bool loaded;
 636        int ret;
 637
 638        /*
 639         * If the zap shader is already loaded into memory we just need to kick
 640         * the remote processor to reinitialize it
 641         */
 642        if (loaded)
 643                return a5xx_zap_shader_resume(gpu);
 644
 645        ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 646
 647        loaded = !ret;
 648        return ret;
 649}
 650
 651#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 652          A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 653          A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 654          A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 655          A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 656          A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 657          A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 658          A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 659          A5XX_RBBM_INT_0_MASK_CP_SW | \
 660          A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 661          A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 662          A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 663
 664static int a5xx_hw_init(struct msm_gpu *gpu)
 665{
 666        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 667        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 668        u32 regbit;
 669        int ret;
 670
 671        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 672
 673        if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
 674            adreno_is_a540(adreno_gpu))
 675                gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 676
 677        /* Make all blocks contribute to the GPU BUSY perf counter */
 678        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 679
 680        /* Enable RBBM error reporting bits */
 681        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 682
 683        if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 684                /*
 685                 * Mask out the activity signals from RB1-3 to avoid false
 686                 * positives
 687                 */
 688
 689                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 690                        0xF0000000);
 691                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 692                        0xFFFFFFFF);
 693                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 694                        0xFFFFFFFF);
 695                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 696                        0xFFFFFFFF);
 697                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 698                        0xFFFFFFFF);
 699                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 700                        0xFFFFFFFF);
 701                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 702                        0xFFFFFFFF);
 703                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 704                        0xFFFFFFFF);
 705        }
 706
 707        /* Enable fault detection */
 708        gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 709                (1 << 30) | 0xFFFF);
 710
 711        /* Turn on performance counters */
 712        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 713
 714        /* Select CP0 to always count cycles */
 715        gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 716
 717        /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 718        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 719
 720        /* Increase VFD cache access so LRZ and other data gets evicted less */
 721        gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 722
 723        /* Disable L2 bypass in the UCHE */
 724        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 725        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 726        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 727        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 728
 729        /* Set the GMEM VA range (0 to gpu->gmem) */
 730        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 731        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 732        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 733                0x00100000 + adreno_gpu->gmem - 1);
 734        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 735
 736        if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) {
 737                gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
 738                if (adreno_is_a508(adreno_gpu))
 739                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 740                else
 741                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
 742                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
 743                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
 744        } else {
 745                gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 746                if (adreno_is_a530(adreno_gpu))
 747                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 748                else
 749                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 750                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 751                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 752        }
 753
 754        if (adreno_is_a508(adreno_gpu))
 755                gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 756                          (0x100 << 11 | 0x100 << 22));
 757        else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
 758                 adreno_is_a512(adreno_gpu))
 759                gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 760                          (0x200 << 11 | 0x200 << 22));
 761        else
 762                gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 763                          (0x400 << 11 | 0x300 << 22));
 764
 765        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 766                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 767
 768        /*
 769         * Disable the RB sampler datapath DP2 clock gating optimization
 770         * for 1-SP GPUs, as it is enabled by default.
 771         */
 772        if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
 773            adreno_is_a512(adreno_gpu))
 774                gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
 775
 776        /* Disable UCHE global filter as SP can invalidate/flush independently */
 777        gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
 778
 779        /* Enable USE_RETENTION_FLOPS */
 780        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 781
 782        /* Enable ME/PFP split notification */
 783        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 784
 785        /*
 786         *  In A5x, CCU can send context_done event of a particular context to
 787         *  UCHE which ultimately reaches CP even when there is valid
 788         *  transaction of that context inside CCU. This can let CP to program
 789         *  config registers, which will make the "valid transaction" inside
 790         *  CCU to be interpreted differently. This can cause gpu fault. This
 791         *  bug is fixed in latest A510 revision. To enable this bug fix -
 792         *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
 793         *  (disable). For older A510 version this bit is unused.
 794         */
 795        if (adreno_is_a510(adreno_gpu))
 796                gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
 797
 798        /* Enable HWCG */
 799        a5xx_set_hwcg(gpu, true);
 800
 801        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 802
 803        /* Set the highest bank bit */
 804        if (adreno_is_a540(adreno_gpu))
 805                regbit = 2;
 806        else
 807                regbit = 1;
 808
 809        gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
 810        gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
 811
 812        if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
 813            adreno_is_a540(adreno_gpu))
 814                gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
 815
 816        /* Disable All flat shading optimization (ALLFLATOPTDIS) */
 817        gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
 818
 819        /* Protect registers from the CP */
 820        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 821
 822        /* RBBM */
 823        gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 824        gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 825        gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 826        gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 827        gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 828        gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 829
 830        /* Content protect */
 831        gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 832                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 833                        16));
 834        gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 835                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 836
 837        /* CP */
 838        gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 839        gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 840        gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 841        gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 842
 843        /* RB */
 844        gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 845        gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 846
 847        /* VPC */
 848        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 849        gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
 850
 851        /* UCHE */
 852        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 853
 854        if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
 855            adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
 856            adreno_is_a530(adreno_gpu))
 857                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 858                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 859
 860        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 861        /*
 862         * Disable the trusted memory range - we don't actually supported secure
 863         * memory rendering at this point in time and we don't want to block off
 864         * part of the virtual memory space.
 865         */
 866        gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 867                REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 868        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 869
 870        /* Put the GPU into 64 bit by default */
 871        gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
 872        gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
 873        gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
 874        gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
 875        gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
 876        gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
 877        gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
 878        gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
 879        gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
 880        gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
 881        gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
 882        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 883
 884        /*
 885         * VPC corner case with local memory load kill leads to corrupt
 886         * internal state. Normal Disable does not work for all a5x chips.
 887         * So do the following setting to disable it.
 888         */
 889        if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
 890                gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
 891                gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
 892        }
 893
 894        ret = adreno_hw_init(gpu);
 895        if (ret)
 896                return ret;
 897
 898        if (!(adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
 899              adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu)))
 900                a5xx_gpmu_ucode_init(gpu);
 901
 902        ret = a5xx_ucode_init(gpu);
 903        if (ret)
 904                return ret;
 905
 906        /* Set the ringbuffer address */
 907        gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
 908                gpu->rb[0]->iova);
 909
 910        /*
 911         * If the microcode supports the WHERE_AM_I opcode then we can use that
 912         * in lieu of the RPTR shadow and enable preemption. Otherwise, we
 913         * can't safely use the RPTR shadow or preemption. In either case, the
 914         * RPTR shadow should be disabled in hardware.
 915         */
 916        gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
 917                MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 918
 919        /* Create a privileged buffer for the RPTR shadow */
 920        if (a5xx_gpu->has_whereami) {
 921                if (!a5xx_gpu->shadow_bo) {
 922                        a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
 923                                sizeof(u32) * gpu->nr_rings,
 924                                MSM_BO_WC | MSM_BO_MAP_PRIV,
 925                                gpu->aspace, &a5xx_gpu->shadow_bo,
 926                                &a5xx_gpu->shadow_iova);
 927
 928                        if (IS_ERR(a5xx_gpu->shadow))
 929                                return PTR_ERR(a5xx_gpu->shadow);
 930                }
 931
 932                gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
 933                        REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
 934        } else if (gpu->nr_rings > 1) {
 935                /* Disable preemption if WHERE_AM_I isn't available */
 936                a5xx_preempt_fini(gpu);
 937                gpu->nr_rings = 1;
 938        }
 939
 940        a5xx_preempt_hw_init(gpu);
 941
 942        /* Disable the interrupts through the initial bringup stage */
 943        gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 944
 945        /* Clear ME_HALT to start the micro engine */
 946        gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 947        ret = a5xx_me_init(gpu);
 948        if (ret)
 949                return ret;
 950
 951        ret = a5xx_power_init(gpu);
 952        if (ret)
 953                return ret;
 954
 955        /*
 956         * Send a pipeline event stat to get misbehaving counters to start
 957         * ticking correctly
 958         */
 959        if (adreno_is_a530(adreno_gpu)) {
 960                OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 961                OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
 962
 963                a5xx_flush(gpu, gpu->rb[0], true);
 964                if (!a5xx_idle(gpu, gpu->rb[0]))
 965                        return -EINVAL;
 966        }
 967
 968        /*
 969         * If the chip that we are using does support loading one, then
 970         * try to load a zap shader into the secure world. If successful
 971         * we can use the CP to switch out of secure mode. If not then we
 972         * have no resource but to try to switch ourselves out manually. If we
 973         * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 974         * be blocked and a permissions violation will soon follow.
 975         */
 976        ret = a5xx_zap_shader_init(gpu);
 977        if (!ret) {
 978                OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 979                OUT_RING(gpu->rb[0], 0x00000000);
 980
 981                a5xx_flush(gpu, gpu->rb[0], true);
 982                if (!a5xx_idle(gpu, gpu->rb[0]))
 983                        return -EINVAL;
 984        } else if (ret == -ENODEV) {
 985                /*
 986                 * This device does not use zap shader (but print a warning
 987                 * just in case someone got their dt wrong.. hopefully they
 988                 * have a debug UART to realize the error of their ways...
 989                 * if you mess this up you are about to crash horribly)
 990                 */
 991                dev_warn_once(gpu->dev->dev,
 992                        "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 993                gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 994        } else {
 995                return ret;
 996        }
 997
 998        /* Last step - yield the ringbuffer */
 999        a5xx_preempt_start(gpu);
1000
1001        return 0;
1002}
1003
1004static void a5xx_recover(struct msm_gpu *gpu)
1005{
1006        int i;
1007
1008        adreno_dump_info(gpu);
1009
1010        for (i = 0; i < 8; i++) {
1011                printk("CP_SCRATCH_REG%d: %u\n", i,
1012                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1013        }
1014
1015        if (hang_debug)
1016                a5xx_dump(gpu);
1017
1018        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1019        gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1020        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1021        adreno_recover(gpu);
1022}
1023
1024static void a5xx_destroy(struct msm_gpu *gpu)
1025{
1026        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1027        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1028
1029        DBG("%s", gpu->name);
1030
1031        a5xx_preempt_fini(gpu);
1032
1033        if (a5xx_gpu->pm4_bo) {
1034                msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1035                drm_gem_object_put(a5xx_gpu->pm4_bo);
1036        }
1037
1038        if (a5xx_gpu->pfp_bo) {
1039                msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1040                drm_gem_object_put(a5xx_gpu->pfp_bo);
1041        }
1042
1043        if (a5xx_gpu->gpmu_bo) {
1044                msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1045                drm_gem_object_put(a5xx_gpu->gpmu_bo);
1046        }
1047
1048        if (a5xx_gpu->shadow_bo) {
1049                msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1050                drm_gem_object_put(a5xx_gpu->shadow_bo);
1051        }
1052
1053        adreno_gpu_cleanup(adreno_gpu);
1054        kfree(a5xx_gpu);
1055}
1056
1057static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1058{
1059        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1060                return false;
1061
1062        /*
1063         * Nearly every abnormality ends up pausing the GPU and triggering a
1064         * fault so we can safely just watch for this one interrupt to fire
1065         */
1066        return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1067                A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1068}
1069
1070bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1071{
1072        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1073        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1074
1075        if (ring != a5xx_gpu->cur_ring) {
1076                WARN(1, "Tried to idle a non-current ringbuffer\n");
1077                return false;
1078        }
1079
1080        /* wait for CP to drain ringbuffer: */
1081        if (!adreno_idle(gpu, ring))
1082                return false;
1083
1084        if (spin_until(_a5xx_check_idle(gpu))) {
1085                DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1086                        gpu->name, __builtin_return_address(0),
1087                        gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1088                        gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1089                        gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1090                        gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1091                return false;
1092        }
1093
1094        return true;
1095}
1096
1097static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1098{
1099        struct msm_gpu *gpu = arg;
1100        pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
1101                        iova, flags,
1102                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1103                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1104                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1105                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
1106
1107        return 0;
1108}
1109
1110static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1111{
1112        u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1113
1114        if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1115                u32 val;
1116
1117                gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1118
1119                /*
1120                 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1121                 * read it twice
1122                 */
1123
1124                gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1125                val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1126
1127                dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1128                        val);
1129        }
1130
1131        if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1132                dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1133                        gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1134
1135        if (status & A5XX_CP_INT_CP_DMA_ERROR)
1136                dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1137
1138        if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1139                u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1140
1141                dev_err_ratelimited(gpu->dev->dev,
1142                        "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1143                        val & (1 << 24) ? "WRITE" : "READ",
1144                        (val & 0xFFFFF) >> 2, val);
1145        }
1146
1147        if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1148                u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1149                const char *access[16] = { "reserved", "reserved",
1150                        "timestamp lo", "timestamp hi", "pfp read", "pfp write",
1151                        "", "", "me read", "me write", "", "", "crashdump read",
1152                        "crashdump write" };
1153
1154                dev_err_ratelimited(gpu->dev->dev,
1155                        "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1156                        status & 0xFFFFF, access[(status >> 24) & 0xF],
1157                        (status & (1 << 31)), status);
1158        }
1159}
1160
1161static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1162{
1163        if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1164                u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1165
1166                dev_err_ratelimited(gpu->dev->dev,
1167                        "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1168                        val & (1 << 28) ? "WRITE" : "READ",
1169                        (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1170                        (val >> 24) & 0xF);
1171
1172                /* Clear the error */
1173                gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1174
1175                /* Clear the interrupt */
1176                gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1177                        A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1178        }
1179
1180        if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1181                dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1182
1183        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1184                dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1185                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1186
1187        if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1188                dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1189                        gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1190
1191        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1192                dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1193                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1194
1195        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1196                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1197
1198        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1199                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1200}
1201
1202static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1203{
1204        uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1205
1206        addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1207
1208        dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1209                addr);
1210}
1211
1212static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1213{
1214        dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1215}
1216
1217static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1218{
1219        struct drm_device *dev = gpu->dev;
1220        struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1221
1222        /*
1223         * If stalled on SMMU fault, we could trip the GPU's hang detection,
1224         * but the fault handler will trigger the devcore dump, and we want
1225         * to otherwise resume normally rather than killing the submit, so
1226         * just bail.
1227         */
1228        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1229                return;
1230
1231        DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1232                ring ? ring->id : -1, ring ? ring->seqno : 0,
1233                gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1234                gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1235                gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1236                gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1237                gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1238                gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1239                gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1240
1241        /* Turn off the hangcheck timer to keep it from bothering us */
1242        del_timer(&gpu->hangcheck_timer);
1243
1244        kthread_queue_work(gpu->worker, &gpu->recover_work);
1245}
1246
1247#define RBBM_ERROR_MASK \
1248        (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1249        A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1250        A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1251        A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1252        A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1253        A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1254
1255static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1256{
1257        u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1258
1259        /*
1260         * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1261         * before the source is cleared the interrupt will storm.
1262         */
1263        gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1264                status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1265
1266        /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1267        if (status & RBBM_ERROR_MASK)
1268                a5xx_rbbm_err_irq(gpu, status);
1269
1270        if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1271                a5xx_cp_err_irq(gpu);
1272
1273        if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1274                a5xx_fault_detect_irq(gpu);
1275
1276        if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1277                a5xx_uche_err_irq(gpu);
1278
1279        if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1280                a5xx_gpmu_err_irq(gpu);
1281
1282        if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1283                a5xx_preempt_trigger(gpu);
1284                msm_gpu_retire(gpu);
1285        }
1286
1287        if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1288                a5xx_preempt_irq(gpu);
1289
1290        return IRQ_HANDLED;
1291}
1292
1293static const u32 a5xx_registers[] = {
1294        0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1295        0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1296        0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1297        0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1298        0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1299        0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1300        0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1301        0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1302        0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1303        0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1304        0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1305        0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1306        0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1307        0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1308        0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1309        0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1310        0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1311        0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1312        0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1313        0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1314        0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1315        0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1316        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1317        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1318        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1319        0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1320        0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1321        0xAC60, 0xAC60, ~0,
1322};
1323
1324static void a5xx_dump(struct msm_gpu *gpu)
1325{
1326        DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1327                gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1328        adreno_dump(gpu);
1329}
1330
1331static int a5xx_pm_resume(struct msm_gpu *gpu)
1332{
1333        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1334        int ret;
1335
1336        /* Turn on the core power */
1337        ret = msm_gpu_pm_resume(gpu);
1338        if (ret)
1339                return ret;
1340
1341        /* Adreno 508, 509, 510, 512 needs manual RBBM sus/res control */
1342        if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1343                /* Halt the sp_input_clk at HM level */
1344                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1345                a5xx_set_hwcg(gpu, true);
1346                /* Turn on sp_input_clk at HM level */
1347                gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1348                return 0;
1349        }
1350
1351        /* Turn the RBCCU domain first to limit the chances of voltage droop */
1352        gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1353
1354        /* Wait 3 usecs before polling */
1355        udelay(3);
1356
1357        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1358                (1 << 20), (1 << 20));
1359        if (ret) {
1360                DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1361                        gpu->name,
1362                        gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1363                return ret;
1364        }
1365
1366        /* Turn on the SP domain */
1367        gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1368        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1369                (1 << 20), (1 << 20));
1370        if (ret)
1371                DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1372                        gpu->name);
1373
1374        return ret;
1375}
1376
1377static int a5xx_pm_suspend(struct msm_gpu *gpu)
1378{
1379        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1380        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1381        u32 mask = 0xf;
1382        int i, ret;
1383
1384        /* A508, A510 have 3 XIN ports in VBIF */
1385        if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
1386                mask = 0x7;
1387
1388        /* Clear the VBIF pipe before shutting down */
1389        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1390        spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1391                                mask) == mask);
1392
1393        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1394
1395        /*
1396         * Reset the VBIF before power collapse to avoid issue with FIFO
1397         * entries on Adreno A510 and A530 (the others will tend to lock up)
1398         */
1399        if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1400                gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1401                gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1402        }
1403
1404        ret = msm_gpu_pm_suspend(gpu);
1405        if (ret)
1406                return ret;
1407
1408        if (a5xx_gpu->has_whereami)
1409                for (i = 0; i < gpu->nr_rings; i++)
1410                        a5xx_gpu->shadow[i] = 0;
1411
1412        return 0;
1413}
1414
1415static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1416{
1417        *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1418                REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1419
1420        return 0;
1421}
1422
1423struct a5xx_crashdumper {
1424        void *ptr;
1425        struct drm_gem_object *bo;
1426        u64 iova;
1427};
1428
1429struct a5xx_gpu_state {
1430        struct msm_gpu_state base;
1431        u32 *hlsqregs;
1432};
1433
1434static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1435                struct a5xx_crashdumper *dumper)
1436{
1437        dumper->ptr = msm_gem_kernel_new(gpu->dev,
1438                SZ_1M, MSM_BO_WC, gpu->aspace,
1439                &dumper->bo, &dumper->iova);
1440
1441        if (!IS_ERR(dumper->ptr))
1442                msm_gem_object_set_name(dumper->bo, "crashdump");
1443
1444        return PTR_ERR_OR_ZERO(dumper->ptr);
1445}
1446
1447static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1448                struct a5xx_crashdumper *dumper)
1449{
1450        u32 val;
1451
1452        if (IS_ERR_OR_NULL(dumper->ptr))
1453                return -EINVAL;
1454
1455        gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1456                REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1457
1458        gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1459
1460        return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1461                val & 0x04, 100, 10000);
1462}
1463
1464/*
1465 * These are a list of the registers that need to be read through the HLSQ
1466 * aperture through the crashdumper.  These are not nominally accessible from
1467 * the CPU on a secure platform.
1468 */
1469static const struct {
1470        u32 type;
1471        u32 regoffset;
1472        u32 count;
1473} a5xx_hlsq_aperture_regs[] = {
1474        { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1475        { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1476        { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1477        { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1478        { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1479        { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1480        { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1481        { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1482        { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1483        { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1484        { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1485        { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1486        { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1487        { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1488        { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1489};
1490
1491static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1492                struct a5xx_gpu_state *a5xx_state)
1493{
1494        struct a5xx_crashdumper dumper = { 0 };
1495        u32 offset, count = 0;
1496        u64 *ptr;
1497        int i;
1498
1499        if (a5xx_crashdumper_init(gpu, &dumper))
1500                return;
1501
1502        /* The script will be written at offset 0 */
1503        ptr = dumper.ptr;
1504
1505        /* Start writing the data at offset 256k */
1506        offset = dumper.iova + (256 * SZ_1K);
1507
1508        /* Count how many additional registers to get from the HLSQ aperture */
1509        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1510                count += a5xx_hlsq_aperture_regs[i].count;
1511
1512        a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1513        if (!a5xx_state->hlsqregs)
1514                return;
1515
1516        /* Build the crashdump script */
1517        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1518                u32 type = a5xx_hlsq_aperture_regs[i].type;
1519                u32 c = a5xx_hlsq_aperture_regs[i].count;
1520
1521                /* Write the register to select the desired bank */
1522                *ptr++ = ((u64) type << 8);
1523                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1524                        (1 << 21) | 1;
1525
1526                *ptr++ = offset;
1527                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1528                        | c;
1529
1530                offset += c * sizeof(u32);
1531        }
1532
1533        /* Write two zeros to close off the script */
1534        *ptr++ = 0;
1535        *ptr++ = 0;
1536
1537        if (a5xx_crashdumper_run(gpu, &dumper)) {
1538                kfree(a5xx_state->hlsqregs);
1539                msm_gem_kernel_put(dumper.bo, gpu->aspace);
1540                return;
1541        }
1542
1543        /* Copy the data from the crashdumper to the state */
1544        memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1545                count * sizeof(u32));
1546
1547        msm_gem_kernel_put(dumper.bo, gpu->aspace);
1548}
1549
1550static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1551{
1552        struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1553                        GFP_KERNEL);
1554        bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1555
1556        if (!a5xx_state)
1557                return ERR_PTR(-ENOMEM);
1558
1559        /* Temporarily disable hardware clock gating before reading the hw */
1560        a5xx_set_hwcg(gpu, false);
1561
1562        /* First get the generic state from the adreno core */
1563        adreno_gpu_state_get(gpu, &(a5xx_state->base));
1564
1565        a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1566
1567        /*
1568         * Get the HLSQ regs with the help of the crashdumper, but only if
1569         * we are not stalled in an iommu fault (in which case the crashdumper
1570         * would not have access to memory)
1571         */
1572        if (!stalled)
1573                a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1574
1575        a5xx_set_hwcg(gpu, true);
1576
1577        return &a5xx_state->base;
1578}
1579
1580static void a5xx_gpu_state_destroy(struct kref *kref)
1581{
1582        struct msm_gpu_state *state = container_of(kref,
1583                struct msm_gpu_state, ref);
1584        struct a5xx_gpu_state *a5xx_state = container_of(state,
1585                struct a5xx_gpu_state, base);
1586
1587        kfree(a5xx_state->hlsqregs);
1588
1589        adreno_gpu_state_destroy(state);
1590        kfree(a5xx_state);
1591}
1592
1593static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1594{
1595        if (IS_ERR_OR_NULL(state))
1596                return 1;
1597
1598        return kref_put(&state->ref, a5xx_gpu_state_destroy);
1599}
1600
1601
1602#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1603static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1604                      struct drm_printer *p)
1605{
1606        int i, j;
1607        u32 pos = 0;
1608        struct a5xx_gpu_state *a5xx_state = container_of(state,
1609                struct a5xx_gpu_state, base);
1610
1611        if (IS_ERR_OR_NULL(state))
1612                return;
1613
1614        adreno_show(gpu, state, p);
1615
1616        /* Dump the additional a5xx HLSQ registers */
1617        if (!a5xx_state->hlsqregs)
1618                return;
1619
1620        drm_printf(p, "registers-hlsq:\n");
1621
1622        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1623                u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1624                u32 c = a5xx_hlsq_aperture_regs[i].count;
1625
1626                for (j = 0; j < c; j++, pos++, o++) {
1627                        /*
1628                         * To keep the crashdump simple we pull the entire range
1629                         * for each register type but not all of the registers
1630                         * in the range are valid. Fortunately invalid registers
1631                         * stick out like a sore thumb with a value of
1632                         * 0xdeadbeef
1633                         */
1634                        if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1635                                continue;
1636
1637                        drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1638                                o << 2, a5xx_state->hlsqregs[pos]);
1639                }
1640        }
1641}
1642#endif
1643
1644static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1645{
1646        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1647        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1648
1649        return a5xx_gpu->cur_ring;
1650}
1651
1652static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1653{
1654        u64 busy_cycles, busy_time;
1655
1656        /* Only read the gpu busy if the hardware is already active */
1657        if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1658                return 0;
1659
1660        busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1661                        REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1662
1663        busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1664        do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1665
1666        gpu->devfreq.busy_cycles = busy_cycles;
1667
1668        pm_runtime_put(&gpu->pdev->dev);
1669
1670        if (WARN_ON(busy_time > ~0LU))
1671                return ~0LU;
1672
1673        return (unsigned long)busy_time;
1674}
1675
1676static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1677{
1678        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1679        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1680
1681        if (a5xx_gpu->has_whereami)
1682                return a5xx_gpu->shadow[ring->id];
1683
1684        return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1685}
1686
1687static const struct adreno_gpu_funcs funcs = {
1688        .base = {
1689                .get_param = adreno_get_param,
1690                .hw_init = a5xx_hw_init,
1691                .pm_suspend = a5xx_pm_suspend,
1692                .pm_resume = a5xx_pm_resume,
1693                .recover = a5xx_recover,
1694                .submit = a5xx_submit,
1695                .active_ring = a5xx_active_ring,
1696                .irq = a5xx_irq,
1697                .destroy = a5xx_destroy,
1698#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1699                .show = a5xx_show,
1700#endif
1701#if defined(CONFIG_DEBUG_FS)
1702                .debugfs_init = a5xx_debugfs_init,
1703#endif
1704                .gpu_busy = a5xx_gpu_busy,
1705                .gpu_state_get = a5xx_gpu_state_get,
1706                .gpu_state_put = a5xx_gpu_state_put,
1707                .create_address_space = adreno_iommu_create_address_space,
1708                .get_rptr = a5xx_get_rptr,
1709        },
1710        .get_timestamp = a5xx_get_timestamp,
1711};
1712
1713static void check_speed_bin(struct device *dev)
1714{
1715        struct nvmem_cell *cell;
1716        u32 val;
1717
1718        /*
1719         * If the OPP table specifies a opp-supported-hw property then we have
1720         * to set something with dev_pm_opp_set_supported_hw() or the table
1721         * doesn't get populated so pick an arbitrary value that should
1722         * ensure the default frequencies are selected but not conflict with any
1723         * actual bins
1724         */
1725        val = 0x80;
1726
1727        cell = nvmem_cell_get(dev, "speed_bin");
1728
1729        if (!IS_ERR(cell)) {
1730                void *buf = nvmem_cell_read(cell, NULL);
1731
1732                if (!IS_ERR(buf)) {
1733                        u8 bin = *((u8 *) buf);
1734
1735                        val = (1 << bin);
1736                        kfree(buf);
1737                }
1738
1739                nvmem_cell_put(cell);
1740        }
1741
1742        devm_pm_opp_set_supported_hw(dev, &val, 1);
1743}
1744
1745struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1746{
1747        struct msm_drm_private *priv = dev->dev_private;
1748        struct platform_device *pdev = priv->gpu_pdev;
1749        struct a5xx_gpu *a5xx_gpu = NULL;
1750        struct adreno_gpu *adreno_gpu;
1751        struct msm_gpu *gpu;
1752        int ret;
1753
1754        if (!pdev) {
1755                DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1756                return ERR_PTR(-ENXIO);
1757        }
1758
1759        a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1760        if (!a5xx_gpu)
1761                return ERR_PTR(-ENOMEM);
1762
1763        adreno_gpu = &a5xx_gpu->base;
1764        gpu = &adreno_gpu->base;
1765
1766        adreno_gpu->registers = a5xx_registers;
1767
1768        a5xx_gpu->lm_leakage = 0x4E001A;
1769
1770        check_speed_bin(&pdev->dev);
1771
1772        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1773        if (ret) {
1774                a5xx_destroy(&(a5xx_gpu->base.base));
1775                return ERR_PTR(ret);
1776        }
1777
1778        if (gpu->aspace)
1779                msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1780
1781        /* Set up the preemption specific bits and pieces for each ringbuffer */
1782        a5xx_preempt_init(gpu);
1783
1784        return gpu;
1785}
1786