linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
   3 */
   4#include "a4xx_gpu.h"
   5
   6#define A4XX_INT0_MASK \
   7        (A4XX_INT0_RBBM_AHB_ERROR |        \
   8         A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
   9         A4XX_INT0_CP_T0_PACKET_IN_IB |    \
  10         A4XX_INT0_CP_OPCODE_ERROR |       \
  11         A4XX_INT0_CP_RESERVED_BIT_ERROR | \
  12         A4XX_INT0_CP_HW_FAULT |           \
  13         A4XX_INT0_CP_IB1_INT |            \
  14         A4XX_INT0_CP_IB2_INT |            \
  15         A4XX_INT0_CP_RB_INT |             \
  16         A4XX_INT0_CP_REG_PROTECT_FAULT |  \
  17         A4XX_INT0_CP_AHB_ERROR_HALT |     \
  18         A4XX_INT0_CACHE_FLUSH_TS |        \
  19         A4XX_INT0_UCHE_OOB_ACCESS)
  20
  21extern bool hang_debug;
  22static void a4xx_dump(struct msm_gpu *gpu);
  23static bool a4xx_idle(struct msm_gpu *gpu);
  24
  25static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  26{
  27        struct msm_ringbuffer *ring = submit->ring;
  28        unsigned int i;
  29
  30        for (i = 0; i < submit->nr_cmds; i++) {
  31                switch (submit->cmd[i].type) {
  32                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  33                        /* ignore IB-targets */
  34                        break;
  35                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  36                        /* ignore if there has not been a ctx switch: */
  37                        if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
  38                                break;
  39                        fallthrough;
  40                case MSM_SUBMIT_CMD_BUF:
  41                        OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
  42                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
  43                        OUT_RING(ring, submit->cmd[i].size);
  44                        OUT_PKT2(ring);
  45                        break;
  46                }
  47        }
  48
  49        OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
  50        OUT_RING(ring, submit->seqno);
  51
  52        /* Flush HLSQ lazy updates to make sure there is nothing
  53         * pending for indirect loads after the timestamp has
  54         * passed:
  55         */
  56        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
  57        OUT_RING(ring, HLSQ_FLUSH);
  58
  59        /* wait for idle before cache flush/interrupt */
  60        OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
  61        OUT_RING(ring, 0x00000000);
  62
  63        /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
  64        OUT_PKT3(ring, CP_EVENT_WRITE, 3);
  65        OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
  66        OUT_RING(ring, rbmemptr(ring, fence));
  67        OUT_RING(ring, submit->seqno);
  68
  69        adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
  70}
  71
  72/*
  73 * a4xx_enable_hwcg() - Program the clock control registers
  74 * @device: The adreno device pointer
  75 */
  76static void a4xx_enable_hwcg(struct msm_gpu *gpu)
  77{
  78        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  79        unsigned int i;
  80        for (i = 0; i < 4; i++)
  81                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
  82        for (i = 0; i < 4; i++)
  83                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
  84        for (i = 0; i < 4; i++)
  85                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
  86        for (i = 0; i < 4; i++)
  87                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
  88        for (i = 0; i < 4; i++)
  89                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
  90        for (i = 0; i < 4; i++)
  91                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
  92        for (i = 0; i < 4; i++)
  93                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
  94        for (i = 0; i < 4; i++)
  95                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
  96        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
  97        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
  98        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
  99        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
 100        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
 101        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
 102        for (i = 0; i < 4; i++)
 103                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
 104
 105        /* Disable L1 clocking in A420 due to CCU issues with it */
 106        for (i = 0; i < 4; i++) {
 107                if (adreno_is_a420(adreno_gpu)) {
 108                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
 109                                        0x00002020);
 110                } else {
 111                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
 112                                        0x00022020);
 113                }
 114        }
 115
 116        /* No CCU for A405 */
 117        if (!adreno_is_a405(adreno_gpu)) {
 118                for (i = 0; i < 4; i++) {
 119                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
 120                                        0x00000922);
 121                }
 122
 123                for (i = 0; i < 4; i++) {
 124                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
 125                                        0x00000000);
 126                }
 127
 128                for (i = 0; i < 4; i++) {
 129                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
 130                                        0x00000001);
 131                }
 132        }
 133
 134        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
 135        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
 136        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
 137        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
 138        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
 139        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
 140        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
 141        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
 142        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
 143        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
 144        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
 145        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
 146        /* Early A430's have a timing issue with SP/TP power collapse;
 147           disabling HW clock gating prevents it. */
 148        if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
 149                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
 150        else
 151                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
 152        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
 153}
 154
 155
 156static bool a4xx_me_init(struct msm_gpu *gpu)
 157{
 158        struct msm_ringbuffer *ring = gpu->rb[0];
 159
 160        OUT_PKT3(ring, CP_ME_INIT, 17);
 161        OUT_RING(ring, 0x000003f7);
 162        OUT_RING(ring, 0x00000000);
 163        OUT_RING(ring, 0x00000000);
 164        OUT_RING(ring, 0x00000000);
 165        OUT_RING(ring, 0x00000080);
 166        OUT_RING(ring, 0x00000100);
 167        OUT_RING(ring, 0x00000180);
 168        OUT_RING(ring, 0x00006600);
 169        OUT_RING(ring, 0x00000150);
 170        OUT_RING(ring, 0x0000014e);
 171        OUT_RING(ring, 0x00000154);
 172        OUT_RING(ring, 0x00000001);
 173        OUT_RING(ring, 0x00000000);
 174        OUT_RING(ring, 0x00000000);
 175        OUT_RING(ring, 0x00000000);
 176        OUT_RING(ring, 0x00000000);
 177        OUT_RING(ring, 0x00000000);
 178
 179        adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
 180        return a4xx_idle(gpu);
 181}
 182
 183static int a4xx_hw_init(struct msm_gpu *gpu)
 184{
 185        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 186        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 187        uint32_t *ptr, len;
 188        int i, ret;
 189
 190        if (adreno_is_a405(adreno_gpu)) {
 191                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 192        } else if (adreno_is_a420(adreno_gpu)) {
 193                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
 194                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
 195                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 196                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 197                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 198                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 199                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 200                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 201        } else if (adreno_is_a430(adreno_gpu)) {
 202                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 203                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 204                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 205                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 206                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 207                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 208        } else {
 209                BUG();
 210        }
 211
 212        /* Make all blocks contribute to the GPU BUSY perf counter */
 213        gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
 214
 215        /* Tune the hystersis counters for SP and CP idle detection */
 216        gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
 217        gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
 218
 219        if (adreno_is_a430(adreno_gpu)) {
 220                gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
 221        }
 222
 223         /* Enable the RBBM error reporting bits */
 224        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
 225
 226        /* Enable AHB error reporting*/
 227        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
 228
 229        /* Enable power counters*/
 230        gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
 231
 232        /*
 233         * Turn on hang detection - this spews a lot of useful information
 234         * into the RBBM registers on a hang:
 235         */
 236        gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
 237                        (1 << 30) | 0xFFFF);
 238
 239        gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
 240                        (unsigned int)(a4xx_gpu->ocmem.base >> 14));
 241
 242        /* Turn on performance counters: */
 243        gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
 244
 245        /* use the first CP counter for timestamp queries.. userspace may set
 246         * this as well but it selects the same counter/countable:
 247         */
 248        gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
 249
 250        if (adreno_is_a430(adreno_gpu))
 251                gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
 252
 253        /* Disable L2 bypass to avoid UCHE out of bounds errors */
 254        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
 255        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
 256
 257        gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
 258                        (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
 259
 260        /* On A430 enable SP regfile sleep for power savings */
 261        /* TODO downstream does this for !420, so maybe applies for 405 too? */
 262        if (!adreno_is_a420(adreno_gpu)) {
 263                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
 264                        0x00000441);
 265                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
 266                        0x00000441);
 267        }
 268
 269        a4xx_enable_hwcg(gpu);
 270
 271        /*
 272         * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
 273         * due to timing issue with HLSQ_TP_CLK_EN
 274         */
 275        if (adreno_is_a420(adreno_gpu)) {
 276                unsigned int val;
 277                val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
 278                val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
 279                val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
 280                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
 281        }
 282
 283        /* setup access protection: */
 284        gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
 285
 286        /* RBBM registers */
 287        gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
 288        gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
 289        gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
 290        gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
 291        gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
 292        gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
 293
 294        /* CP registers */
 295        gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
 296        gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
 297
 298
 299        /* RB registers */
 300        gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
 301
 302        /* HLSQ registers */
 303        gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
 304
 305        /* VPC registers */
 306        gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
 307
 308        /* SMMU registers */
 309        gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
 310
 311        gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
 312
 313        ret = adreno_hw_init(gpu);
 314        if (ret)
 315                return ret;
 316
 317        /*
 318         * Use the default ringbuffer size and block size but disable the RPTR
 319         * shadow
 320         */
 321        gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
 322                MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 323
 324        /* Set the ringbuffer address */
 325        gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
 326
 327        /* Load PM4: */
 328        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
 329        len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
 330        DBG("loading PM4 ucode version: %u", ptr[0]);
 331        gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
 332        for (i = 1; i < len; i++)
 333                gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
 334
 335        /* Load PFP: */
 336        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
 337        len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
 338        DBG("loading PFP ucode version: %u", ptr[0]);
 339
 340        gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
 341        for (i = 1; i < len; i++)
 342                gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
 343
 344        /* clear ME_HALT to start micro engine */
 345        gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
 346
 347        return a4xx_me_init(gpu) ? 0 : -EINVAL;
 348}
 349
 350static void a4xx_recover(struct msm_gpu *gpu)
 351{
 352        int i;
 353
 354        adreno_dump_info(gpu);
 355
 356        for (i = 0; i < 8; i++) {
 357                printk("CP_SCRATCH_REG%d: %u\n", i,
 358                        gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
 359        }
 360
 361        /* dump registers before resetting gpu, if enabled: */
 362        if (hang_debug)
 363                a4xx_dump(gpu);
 364
 365        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
 366        gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
 367        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
 368        adreno_recover(gpu);
 369}
 370
 371static void a4xx_destroy(struct msm_gpu *gpu)
 372{
 373        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 374        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 375
 376        DBG("%s", gpu->name);
 377
 378        adreno_gpu_cleanup(adreno_gpu);
 379
 380        adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
 381
 382        kfree(a4xx_gpu);
 383}
 384
 385static bool a4xx_idle(struct msm_gpu *gpu)
 386{
 387        /* wait for ringbuffer to drain: */
 388        if (!adreno_idle(gpu, gpu->rb[0]))
 389                return false;
 390
 391        /* then wait for GPU to finish: */
 392        if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
 393                                        A4XX_RBBM_STATUS_GPU_BUSY))) {
 394                DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 395                /* TODO maybe we need to reset GPU here to recover from hang? */
 396                return false;
 397        }
 398
 399        return true;
 400}
 401
 402static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
 403{
 404        uint32_t status;
 405
 406        status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
 407        DBG("%s: Int status %08x", gpu->name, status);
 408
 409        if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
 410                uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
 411                printk("CP | Protected mode error| %s | addr=%x\n",
 412                        reg & (1 << 24) ? "WRITE" : "READ",
 413                        (reg & 0xFFFFF) >> 2);
 414        }
 415
 416        gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
 417
 418        msm_gpu_retire(gpu);
 419
 420        return IRQ_HANDLED;
 421}
 422
 423static const unsigned int a4xx_registers[] = {
 424        /* RBBM */
 425        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 426        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 427        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 428        /* CP */
 429        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 430        0x0578, 0x058F,
 431        /* VSC */
 432        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 433        /* GRAS */
 434        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 435        /* RB */
 436        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 437        /* PC */
 438        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 439        /* VFD */
 440        0x0E40, 0x0E4A,
 441        /* VPC */
 442        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 443        /* UCHE */
 444        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 445        /* VMIDMT */
 446        0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
 447        0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
 448        0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
 449        0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
 450        0x1380, 0x1380,
 451        /* GRAS CTX 0 */
 452        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 453        /* PC CTX 0 */
 454        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 455        /* VFD CTX 0 */
 456        0x2200, 0x2204, 0x2208, 0x22A9,
 457        /* GRAS CTX 1 */
 458        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 459        /* PC CTX 1 */
 460        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 461        /* VFD CTX 1 */
 462        0x2600, 0x2604, 0x2608, 0x26A9,
 463        /* XPU */
 464        0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
 465        0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
 466        0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
 467        /* VBIF */
 468        0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
 469        0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
 470        0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
 471        0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
 472        0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
 473        0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
 474        0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
 475        0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
 476        0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
 477        0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
 478        0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
 479        0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
 480        0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
 481        0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
 482        0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
 483        0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
 484        0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
 485        0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
 486        0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
 487        0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
 488        0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
 489        0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
 490        0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
 491        0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
 492        0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
 493        0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
 494        0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
 495        0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
 496        0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
 497        0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
 498        0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
 499        0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
 500        0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
 501        0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
 502        ~0 /* sentinel */
 503};
 504
 505static const unsigned int a405_registers[] = {
 506        /* RBBM */
 507        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 508        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 509        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 510        /* CP */
 511        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 512        0x0578, 0x058F,
 513        /* VSC */
 514        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 515        /* GRAS */
 516        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 517        /* RB */
 518        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 519        /* PC */
 520        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 521        /* VFD */
 522        0x0E40, 0x0E4A,
 523        /* VPC */
 524        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 525        /* UCHE */
 526        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 527        /* GRAS CTX 0 */
 528        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 529        /* PC CTX 0 */
 530        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 531        /* VFD CTX 0 */
 532        0x2200, 0x2204, 0x2208, 0x22A9,
 533        /* GRAS CTX 1 */
 534        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 535        /* PC CTX 1 */
 536        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 537        /* VFD CTX 1 */
 538        0x2600, 0x2604, 0x2608, 0x26A9,
 539        /* VBIF version 0x20050000*/
 540        0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
 541        0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
 542        0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
 543        0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
 544        0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
 545        0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
 546        0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
 547        0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
 548        ~0 /* sentinel */
 549};
 550
 551static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
 552{
 553        struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 554
 555        if (!state)
 556                return ERR_PTR(-ENOMEM);
 557
 558        adreno_gpu_state_get(gpu, state);
 559
 560        state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
 561
 562        return state;
 563}
 564
 565static void a4xx_dump(struct msm_gpu *gpu)
 566{
 567        printk("status:   %08x\n",
 568                        gpu_read(gpu, REG_A4XX_RBBM_STATUS));
 569        adreno_dump(gpu);
 570}
 571
 572static int a4xx_pm_resume(struct msm_gpu *gpu) {
 573        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 574        int ret;
 575
 576        ret = msm_gpu_pm_resume(gpu);
 577        if (ret)
 578                return ret;
 579
 580        if (adreno_is_a430(adreno_gpu)) {
 581                unsigned int reg;
 582                /* Set the default register values; set SW_COLLAPSE to 0 */
 583                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
 584                do {
 585                        udelay(5);
 586                        reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
 587                } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
 588        }
 589        return 0;
 590}
 591
 592static int a4xx_pm_suspend(struct msm_gpu *gpu) {
 593        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 594        int ret;
 595
 596        ret = msm_gpu_pm_suspend(gpu);
 597        if (ret)
 598                return ret;
 599
 600        if (adreno_is_a430(adreno_gpu)) {
 601                /* Set the default register values; set SW_COLLAPSE to 1 */
 602                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
 603        }
 604        return 0;
 605}
 606
 607static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 608{
 609        *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
 610                REG_A4XX_RBBM_PERFCTR_CP_0_HI);
 611
 612        return 0;
 613}
 614
 615static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 616{
 617        ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
 618        return ring->memptrs->rptr;
 619}
 620
 621static const struct adreno_gpu_funcs funcs = {
 622        .base = {
 623                .get_param = adreno_get_param,
 624                .set_param = adreno_set_param,
 625                .hw_init = a4xx_hw_init,
 626                .pm_suspend = a4xx_pm_suspend,
 627                .pm_resume = a4xx_pm_resume,
 628                .recover = a4xx_recover,
 629                .submit = a4xx_submit,
 630                .active_ring = adreno_active_ring,
 631                .irq = a4xx_irq,
 632                .destroy = a4xx_destroy,
 633#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 634                .show = adreno_show,
 635#endif
 636                .gpu_state_get = a4xx_gpu_state_get,
 637                .gpu_state_put = adreno_gpu_state_put,
 638                .create_address_space = adreno_iommu_create_address_space,
 639                .get_rptr = a4xx_get_rptr,
 640        },
 641        .get_timestamp = a4xx_get_timestamp,
 642};
 643
 644struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 645{
 646        struct a4xx_gpu *a4xx_gpu = NULL;
 647        struct adreno_gpu *adreno_gpu;
 648        struct msm_gpu *gpu;
 649        struct msm_drm_private *priv = dev->dev_private;
 650        struct platform_device *pdev = priv->gpu_pdev;
 651        struct icc_path *ocmem_icc_path;
 652        struct icc_path *icc_path;
 653        int ret;
 654
 655        if (!pdev) {
 656                DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
 657                ret = -ENXIO;
 658                goto fail;
 659        }
 660
 661        a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
 662        if (!a4xx_gpu) {
 663                ret = -ENOMEM;
 664                goto fail;
 665        }
 666
 667        adreno_gpu = &a4xx_gpu->base;
 668        gpu = &adreno_gpu->base;
 669
 670        gpu->perfcntrs = NULL;
 671        gpu->num_perfcntrs = 0;
 672
 673        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
 674        if (ret)
 675                goto fail;
 676
 677        adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
 678                                                             a4xx_registers;
 679
 680        /* if needed, allocate gmem: */
 681        ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
 682                                    &a4xx_gpu->ocmem);
 683        if (ret)
 684                goto fail;
 685
 686        if (!gpu->aspace) {
 687                /* TODO we think it is possible to configure the GPU to
 688                 * restrict access to VRAM carveout.  But the required
 689                 * registers are unknown.  For now just bail out and
 690                 * limp along with just modesetting.  If it turns out
 691                 * to not be possible to restrict access, then we must
 692                 * implement a cmdstream validator.
 693                 */
 694                DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
 695                if (!allow_vram_carveout) {
 696                        ret = -ENXIO;
 697                        goto fail;
 698                }
 699        }
 700
 701        icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
 702        if (IS_ERR(icc_path)) {
 703                ret = PTR_ERR(icc_path);
 704                goto fail;
 705        }
 706
 707        ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
 708        if (IS_ERR(ocmem_icc_path)) {
 709                ret = PTR_ERR(ocmem_icc_path);
 710                /* allow -ENODATA, ocmem icc is optional */
 711                if (ret != -ENODATA)
 712                        goto fail;
 713                ocmem_icc_path = NULL;
 714        }
 715
 716        /*
 717         * Set the ICC path to maximum speed for now by multiplying the fastest
 718         * frequency by the bus width (8). We'll want to scale this later on to
 719         * improve battery life.
 720         */
 721        icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 722        icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 723
 724        return gpu;
 725
 726fail:
 727        if (a4xx_gpu)
 728                a4xx_destroy(&a4xx_gpu->base.base);
 729
 730        return ERR_PTR(ret);
 731}
 732