linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
   3 */
   4#include "a4xx_gpu.h"
   5
   6#define A4XX_INT0_MASK \
   7        (A4XX_INT0_RBBM_AHB_ERROR |        \
   8         A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
   9         A4XX_INT0_CP_T0_PACKET_IN_IB |    \
  10         A4XX_INT0_CP_OPCODE_ERROR |       \
  11         A4XX_INT0_CP_RESERVED_BIT_ERROR | \
  12         A4XX_INT0_CP_HW_FAULT |           \
  13         A4XX_INT0_CP_IB1_INT |            \
  14         A4XX_INT0_CP_IB2_INT |            \
  15         A4XX_INT0_CP_RB_INT |             \
  16         A4XX_INT0_CP_REG_PROTECT_FAULT |  \
  17         A4XX_INT0_CP_AHB_ERROR_HALT |     \
  18         A4XX_INT0_CACHE_FLUSH_TS |        \
  19         A4XX_INT0_UCHE_OOB_ACCESS)
  20
  21extern bool hang_debug;
  22static void a4xx_dump(struct msm_gpu *gpu);
  23static bool a4xx_idle(struct msm_gpu *gpu);
  24
  25/*
  26 * a4xx_enable_hwcg() - Program the clock control registers
  27 * @device: The adreno device pointer
  28 */
  29static void a4xx_enable_hwcg(struct msm_gpu *gpu)
  30{
  31        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  32        unsigned int i;
  33        for (i = 0; i < 4; i++)
  34                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
  35        for (i = 0; i < 4; i++)
  36                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
  37        for (i = 0; i < 4; i++)
  38                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
  39        for (i = 0; i < 4; i++)
  40                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
  41        for (i = 0; i < 4; i++)
  42                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
  43        for (i = 0; i < 4; i++)
  44                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
  45        for (i = 0; i < 4; i++)
  46                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
  47        for (i = 0; i < 4; i++)
  48                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
  49        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
  50        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
  51        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
  52        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
  53        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
  54        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
  55        for (i = 0; i < 4; i++)
  56                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
  57
  58        /* Disable L1 clocking in A420 due to CCU issues with it */
  59        for (i = 0; i < 4; i++) {
  60                if (adreno_is_a420(adreno_gpu)) {
  61                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
  62                                        0x00002020);
  63                } else {
  64                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
  65                                        0x00022020);
  66                }
  67        }
  68
  69        /* No CCU for A405 */
  70        if (!adreno_is_a405(adreno_gpu)) {
  71                for (i = 0; i < 4; i++) {
  72                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
  73                                        0x00000922);
  74                }
  75
  76                for (i = 0; i < 4; i++) {
  77                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
  78                                        0x00000000);
  79                }
  80
  81                for (i = 0; i < 4; i++) {
  82                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
  83                                        0x00000001);
  84                }
  85        }
  86
  87        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
  88        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
  89        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
  90        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
  91        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
  92        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
  93        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
  94        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
  95        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
  96        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
  97        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
  98        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
  99        /* Early A430's have a timing issue with SP/TP power collapse;
 100           disabling HW clock gating prevents it. */
 101        if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
 102                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
 103        else
 104                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
 105        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
 106}
 107
 108
 109static bool a4xx_me_init(struct msm_gpu *gpu)
 110{
 111        struct msm_ringbuffer *ring = gpu->rb[0];
 112
 113        OUT_PKT3(ring, CP_ME_INIT, 17);
 114        OUT_RING(ring, 0x000003f7);
 115        OUT_RING(ring, 0x00000000);
 116        OUT_RING(ring, 0x00000000);
 117        OUT_RING(ring, 0x00000000);
 118        OUT_RING(ring, 0x00000080);
 119        OUT_RING(ring, 0x00000100);
 120        OUT_RING(ring, 0x00000180);
 121        OUT_RING(ring, 0x00006600);
 122        OUT_RING(ring, 0x00000150);
 123        OUT_RING(ring, 0x0000014e);
 124        OUT_RING(ring, 0x00000154);
 125        OUT_RING(ring, 0x00000001);
 126        OUT_RING(ring, 0x00000000);
 127        OUT_RING(ring, 0x00000000);
 128        OUT_RING(ring, 0x00000000);
 129        OUT_RING(ring, 0x00000000);
 130        OUT_RING(ring, 0x00000000);
 131
 132        gpu->funcs->flush(gpu, ring);
 133        return a4xx_idle(gpu);
 134}
 135
 136static int a4xx_hw_init(struct msm_gpu *gpu)
 137{
 138        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 139        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 140        uint32_t *ptr, len;
 141        int i, ret;
 142
 143        if (adreno_is_a405(adreno_gpu)) {
 144                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 145        } else if (adreno_is_a420(adreno_gpu)) {
 146                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
 147                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
 148                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 149                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 150                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 151                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 152                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 153                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 154        } else if (adreno_is_a430(adreno_gpu)) {
 155                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 156                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 157                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 158                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 159                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 160                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 161        } else {
 162                BUG();
 163        }
 164
 165        /* Make all blocks contribute to the GPU BUSY perf counter */
 166        gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
 167
 168        /* Tune the hystersis counters for SP and CP idle detection */
 169        gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
 170        gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
 171
 172        if (adreno_is_a430(adreno_gpu)) {
 173                gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
 174        }
 175
 176         /* Enable the RBBM error reporting bits */
 177        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
 178
 179        /* Enable AHB error reporting*/
 180        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
 181
 182        /* Enable power counters*/
 183        gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
 184
 185        /*
 186         * Turn on hang detection - this spews a lot of useful information
 187         * into the RBBM registers on a hang:
 188         */
 189        gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
 190                        (1 << 30) | 0xFFFF);
 191
 192        gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
 193                        (unsigned int)(a4xx_gpu->ocmem.base >> 14));
 194
 195        /* Turn on performance counters: */
 196        gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
 197
 198        /* use the first CP counter for timestamp queries.. userspace may set
 199         * this as well but it selects the same counter/countable:
 200         */
 201        gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
 202
 203        if (adreno_is_a430(adreno_gpu))
 204                gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
 205
 206        /* Disable L2 bypass to avoid UCHE out of bounds errors */
 207        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
 208        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
 209
 210        gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
 211                        (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
 212
 213        /* On A430 enable SP regfile sleep for power savings */
 214        /* TODO downstream does this for !420, so maybe applies for 405 too? */
 215        if (!adreno_is_a420(adreno_gpu)) {
 216                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
 217                        0x00000441);
 218                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
 219                        0x00000441);
 220        }
 221
 222        a4xx_enable_hwcg(gpu);
 223
 224        /*
 225         * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
 226         * due to timing issue with HLSQ_TP_CLK_EN
 227         */
 228        if (adreno_is_a420(adreno_gpu)) {
 229                unsigned int val;
 230                val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
 231                val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
 232                val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
 233                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
 234        }
 235
 236        /* setup access protection: */
 237        gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
 238
 239        /* RBBM registers */
 240        gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
 241        gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
 242        gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
 243        gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
 244        gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
 245        gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
 246
 247        /* CP registers */
 248        gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
 249        gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
 250
 251
 252        /* RB registers */
 253        gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
 254
 255        /* HLSQ registers */
 256        gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
 257
 258        /* VPC registers */
 259        gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
 260
 261        /* SMMU registers */
 262        gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
 263
 264        gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
 265
 266        ret = adreno_hw_init(gpu);
 267        if (ret)
 268                return ret;
 269
 270        /*
 271         * Use the default ringbuffer size and block size but disable the RPTR
 272         * shadow
 273         */
 274        gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
 275                MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 276
 277        /* Set the ringbuffer address */
 278        gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
 279
 280        /* Load PM4: */
 281        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
 282        len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
 283        DBG("loading PM4 ucode version: %u", ptr[0]);
 284        gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
 285        for (i = 1; i < len; i++)
 286                gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
 287
 288        /* Load PFP: */
 289        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
 290        len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
 291        DBG("loading PFP ucode version: %u", ptr[0]);
 292
 293        gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
 294        for (i = 1; i < len; i++)
 295                gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
 296
 297        /* clear ME_HALT to start micro engine */
 298        gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
 299
 300        return a4xx_me_init(gpu) ? 0 : -EINVAL;
 301}
 302
 303static void a4xx_recover(struct msm_gpu *gpu)
 304{
 305        int i;
 306
 307        adreno_dump_info(gpu);
 308
 309        for (i = 0; i < 8; i++) {
 310                printk("CP_SCRATCH_REG%d: %u\n", i,
 311                        gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
 312        }
 313
 314        /* dump registers before resetting gpu, if enabled: */
 315        if (hang_debug)
 316                a4xx_dump(gpu);
 317
 318        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
 319        gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
 320        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
 321        adreno_recover(gpu);
 322}
 323
 324static void a4xx_destroy(struct msm_gpu *gpu)
 325{
 326        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 327        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 328
 329        DBG("%s", gpu->name);
 330
 331        adreno_gpu_cleanup(adreno_gpu);
 332
 333        adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
 334
 335        kfree(a4xx_gpu);
 336}
 337
 338static bool a4xx_idle(struct msm_gpu *gpu)
 339{
 340        /* wait for ringbuffer to drain: */
 341        if (!adreno_idle(gpu, gpu->rb[0]))
 342                return false;
 343
 344        /* then wait for GPU to finish: */
 345        if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
 346                                        A4XX_RBBM_STATUS_GPU_BUSY))) {
 347                DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 348                /* TODO maybe we need to reset GPU here to recover from hang? */
 349                return false;
 350        }
 351
 352        return true;
 353}
 354
 355static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
 356{
 357        uint32_t status;
 358
 359        status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
 360        DBG("%s: Int status %08x", gpu->name, status);
 361
 362        if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
 363                uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
 364                printk("CP | Protected mode error| %s | addr=%x\n",
 365                        reg & (1 << 24) ? "WRITE" : "READ",
 366                        (reg & 0xFFFFF) >> 2);
 367        }
 368
 369        gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
 370
 371        msm_gpu_retire(gpu);
 372
 373        return IRQ_HANDLED;
 374}
 375
 376static const unsigned int a4xx_registers[] = {
 377        /* RBBM */
 378        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 379        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 380        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 381        /* CP */
 382        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 383        0x0578, 0x058F,
 384        /* VSC */
 385        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 386        /* GRAS */
 387        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 388        /* RB */
 389        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 390        /* PC */
 391        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 392        /* VFD */
 393        0x0E40, 0x0E4A,
 394        /* VPC */
 395        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 396        /* UCHE */
 397        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 398        /* VMIDMT */
 399        0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
 400        0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
 401        0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
 402        0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
 403        0x1380, 0x1380,
 404        /* GRAS CTX 0 */
 405        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 406        /* PC CTX 0 */
 407        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 408        /* VFD CTX 0 */
 409        0x2200, 0x2204, 0x2208, 0x22A9,
 410        /* GRAS CTX 1 */
 411        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 412        /* PC CTX 1 */
 413        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 414        /* VFD CTX 1 */
 415        0x2600, 0x2604, 0x2608, 0x26A9,
 416        /* XPU */
 417        0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
 418        0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
 419        0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
 420        /* VBIF */
 421        0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
 422        0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
 423        0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
 424        0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
 425        0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
 426        0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
 427        0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
 428        0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
 429        0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
 430        0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
 431        0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
 432        0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
 433        0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
 434        0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
 435        0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
 436        0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
 437        0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
 438        0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
 439        0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
 440        0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
 441        0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
 442        0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
 443        0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
 444        0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
 445        0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
 446        0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
 447        0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
 448        0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
 449        0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
 450        0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
 451        0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
 452        0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
 453        0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
 454        0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
 455        ~0 /* sentinel */
 456};
 457
 458static const unsigned int a405_registers[] = {
 459        /* RBBM */
 460        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 461        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 462        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 463        /* CP */
 464        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 465        0x0578, 0x058F,
 466        /* VSC */
 467        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 468        /* GRAS */
 469        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 470        /* RB */
 471        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 472        /* PC */
 473        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 474        /* VFD */
 475        0x0E40, 0x0E4A,
 476        /* VPC */
 477        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 478        /* UCHE */
 479        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 480        /* GRAS CTX 0 */
 481        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 482        /* PC CTX 0 */
 483        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 484        /* VFD CTX 0 */
 485        0x2200, 0x2204, 0x2208, 0x22A9,
 486        /* GRAS CTX 1 */
 487        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 488        /* PC CTX 1 */
 489        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 490        /* VFD CTX 1 */
 491        0x2600, 0x2604, 0x2608, 0x26A9,
 492        /* VBIF version 0x20050000*/
 493        0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
 494        0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
 495        0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
 496        0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
 497        0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
 498        0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
 499        0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
 500        0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
 501        ~0 /* sentinel */
 502};
 503
 504static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
 505{
 506        struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 507
 508        if (!state)
 509                return ERR_PTR(-ENOMEM);
 510
 511        adreno_gpu_state_get(gpu, state);
 512
 513        state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
 514
 515        return state;
 516}
 517
 518/* Register offset defines for A4XX, in order of enum adreno_regs */
 519static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
 520        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
 521        REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
 522        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
 523        REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
 524        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
 525        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
 526        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
 527};
 528
 529static void a4xx_dump(struct msm_gpu *gpu)
 530{
 531        printk("status:   %08x\n",
 532                        gpu_read(gpu, REG_A4XX_RBBM_STATUS));
 533        adreno_dump(gpu);
 534}
 535
 536static int a4xx_pm_resume(struct msm_gpu *gpu) {
 537        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 538        int ret;
 539
 540        ret = msm_gpu_pm_resume(gpu);
 541        if (ret)
 542                return ret;
 543
 544        if (adreno_is_a430(adreno_gpu)) {
 545                unsigned int reg;
 546                /* Set the default register values; set SW_COLLAPSE to 0 */
 547                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
 548                do {
 549                        udelay(5);
 550                        reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
 551                } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
 552        }
 553        return 0;
 554}
 555
 556static int a4xx_pm_suspend(struct msm_gpu *gpu) {
 557        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 558        int ret;
 559
 560        ret = msm_gpu_pm_suspend(gpu);
 561        if (ret)
 562                return ret;
 563
 564        if (adreno_is_a430(adreno_gpu)) {
 565                /* Set the default register values; set SW_COLLAPSE to 1 */
 566                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
 567        }
 568        return 0;
 569}
 570
 571static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 572{
 573        *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
 574                REG_A4XX_RBBM_PERFCTR_CP_0_HI);
 575
 576        return 0;
 577}
 578
 579static const struct adreno_gpu_funcs funcs = {
 580        .base = {
 581                .get_param = adreno_get_param,
 582                .hw_init = a4xx_hw_init,
 583                .pm_suspend = a4xx_pm_suspend,
 584                .pm_resume = a4xx_pm_resume,
 585                .recover = a4xx_recover,
 586                .submit = adreno_submit,
 587                .flush = adreno_flush,
 588                .active_ring = adreno_active_ring,
 589                .irq = a4xx_irq,
 590                .destroy = a4xx_destroy,
 591#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 592                .show = adreno_show,
 593#endif
 594                .gpu_state_get = a4xx_gpu_state_get,
 595                .gpu_state_put = adreno_gpu_state_put,
 596                .create_address_space = adreno_iommu_create_address_space,
 597        },
 598        .get_timestamp = a4xx_get_timestamp,
 599};
 600
 601struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 602{
 603        struct a4xx_gpu *a4xx_gpu = NULL;
 604        struct adreno_gpu *adreno_gpu;
 605        struct msm_gpu *gpu;
 606        struct msm_drm_private *priv = dev->dev_private;
 607        struct platform_device *pdev = priv->gpu_pdev;
 608        int ret;
 609
 610        if (!pdev) {
 611                DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
 612                ret = -ENXIO;
 613                goto fail;
 614        }
 615
 616        a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
 617        if (!a4xx_gpu) {
 618                ret = -ENOMEM;
 619                goto fail;
 620        }
 621
 622        adreno_gpu = &a4xx_gpu->base;
 623        gpu = &adreno_gpu->base;
 624
 625        gpu->perfcntrs = NULL;
 626        gpu->num_perfcntrs = 0;
 627
 628        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
 629        if (ret)
 630                goto fail;
 631
 632        adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
 633                                                             a4xx_registers;
 634        adreno_gpu->reg_offsets = a4xx_register_offsets;
 635
 636        /* if needed, allocate gmem: */
 637        if (adreno_is_a4xx(adreno_gpu)) {
 638                ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
 639                                            &a4xx_gpu->ocmem);
 640                if (ret)
 641                        goto fail;
 642        }
 643
 644        if (!gpu->aspace) {
 645                /* TODO we think it is possible to configure the GPU to
 646                 * restrict access to VRAM carveout.  But the required
 647                 * registers are unknown.  For now just bail out and
 648                 * limp along with just modesetting.  If it turns out
 649                 * to not be possible to restrict access, then we must
 650                 * implement a cmdstream validator.
 651                 */
 652                DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
 653                ret = -ENXIO;
 654                goto fail;
 655        }
 656
 657        /*
 658         * Set the ICC path to maximum speed for now by multiplying the fastest
 659         * frequency by the bus width (8). We'll want to scale this later on to
 660         * improve battery life.
 661         */
 662        icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 663        icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 664
 665        return gpu;
 666
 667fail:
 668        if (a4xx_gpu)
 669                a4xx_destroy(&a4xx_gpu->base.base);
 670
 671        return ERR_PTR(ret);
 672}
 673