linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
   3 */
   4#include "a4xx_gpu.h"
   5
   6#define A4XX_INT0_MASK \
   7        (A4XX_INT0_RBBM_AHB_ERROR |        \
   8         A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
   9         A4XX_INT0_CP_T0_PACKET_IN_IB |    \
  10         A4XX_INT0_CP_OPCODE_ERROR |       \
  11         A4XX_INT0_CP_RESERVED_BIT_ERROR | \
  12         A4XX_INT0_CP_HW_FAULT |           \
  13         A4XX_INT0_CP_IB1_INT |            \
  14         A4XX_INT0_CP_IB2_INT |            \
  15         A4XX_INT0_CP_RB_INT |             \
  16         A4XX_INT0_CP_REG_PROTECT_FAULT |  \
  17         A4XX_INT0_CP_AHB_ERROR_HALT |     \
  18         A4XX_INT0_CACHE_FLUSH_TS |        \
  19         A4XX_INT0_UCHE_OOB_ACCESS)
  20
  21extern bool hang_debug;
  22static void a4xx_dump(struct msm_gpu *gpu);
  23static bool a4xx_idle(struct msm_gpu *gpu);
  24
  25static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  26{
  27        struct msm_drm_private *priv = gpu->dev->dev_private;
  28        struct msm_ringbuffer *ring = submit->ring;
  29        unsigned int i;
  30
  31        for (i = 0; i < submit->nr_cmds; i++) {
  32                switch (submit->cmd[i].type) {
  33                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  34                        /* ignore IB-targets */
  35                        break;
  36                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  37                        /* ignore if there has not been a ctx switch: */
  38                        if (priv->lastctx == submit->queue->ctx)
  39                                break;
  40                        fallthrough;
  41                case MSM_SUBMIT_CMD_BUF:
  42                        OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
  43                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
  44                        OUT_RING(ring, submit->cmd[i].size);
  45                        OUT_PKT2(ring);
  46                        break;
  47                }
  48        }
  49
  50        OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
  51        OUT_RING(ring, submit->seqno);
  52
  53        /* Flush HLSQ lazy updates to make sure there is nothing
  54         * pending for indirect loads after the timestamp has
  55         * passed:
  56         */
  57        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
  58        OUT_RING(ring, HLSQ_FLUSH);
  59
  60        /* wait for idle before cache flush/interrupt */
  61        OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
  62        OUT_RING(ring, 0x00000000);
  63
  64        /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
  65        OUT_PKT3(ring, CP_EVENT_WRITE, 3);
  66        OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
  67        OUT_RING(ring, rbmemptr(ring, fence));
  68        OUT_RING(ring, submit->seqno);
  69
  70        adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
  71}
  72
  73/*
  74 * a4xx_enable_hwcg() - Program the clock control registers
  75 * @device: The adreno device pointer
  76 */
  77static void a4xx_enable_hwcg(struct msm_gpu *gpu)
  78{
  79        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  80        unsigned int i;
  81        for (i = 0; i < 4; i++)
  82                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
  83        for (i = 0; i < 4; i++)
  84                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
  85        for (i = 0; i < 4; i++)
  86                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
  87        for (i = 0; i < 4; i++)
  88                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
  89        for (i = 0; i < 4; i++)
  90                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
  91        for (i = 0; i < 4; i++)
  92                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
  93        for (i = 0; i < 4; i++)
  94                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
  95        for (i = 0; i < 4; i++)
  96                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
  97        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
  98        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
  99        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
 100        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
 101        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
 102        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
 103        for (i = 0; i < 4; i++)
 104                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
 105
 106        /* Disable L1 clocking in A420 due to CCU issues with it */
 107        for (i = 0; i < 4; i++) {
 108                if (adreno_is_a420(adreno_gpu)) {
 109                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
 110                                        0x00002020);
 111                } else {
 112                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
 113                                        0x00022020);
 114                }
 115        }
 116
 117        /* No CCU for A405 */
 118        if (!adreno_is_a405(adreno_gpu)) {
 119                for (i = 0; i < 4; i++) {
 120                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
 121                                        0x00000922);
 122                }
 123
 124                for (i = 0; i < 4; i++) {
 125                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
 126                                        0x00000000);
 127                }
 128
 129                for (i = 0; i < 4; i++) {
 130                        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
 131                                        0x00000001);
 132                }
 133        }
 134
 135        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
 136        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
 137        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
 138        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
 139        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
 140        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
 141        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
 142        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
 143        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
 144        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
 145        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
 146        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
 147        /* Early A430's have a timing issue with SP/TP power collapse;
 148           disabling HW clock gating prevents it. */
 149        if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
 150                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
 151        else
 152                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
 153        gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
 154}
 155
 156
 157static bool a4xx_me_init(struct msm_gpu *gpu)
 158{
 159        struct msm_ringbuffer *ring = gpu->rb[0];
 160
 161        OUT_PKT3(ring, CP_ME_INIT, 17);
 162        OUT_RING(ring, 0x000003f7);
 163        OUT_RING(ring, 0x00000000);
 164        OUT_RING(ring, 0x00000000);
 165        OUT_RING(ring, 0x00000000);
 166        OUT_RING(ring, 0x00000080);
 167        OUT_RING(ring, 0x00000100);
 168        OUT_RING(ring, 0x00000180);
 169        OUT_RING(ring, 0x00006600);
 170        OUT_RING(ring, 0x00000150);
 171        OUT_RING(ring, 0x0000014e);
 172        OUT_RING(ring, 0x00000154);
 173        OUT_RING(ring, 0x00000001);
 174        OUT_RING(ring, 0x00000000);
 175        OUT_RING(ring, 0x00000000);
 176        OUT_RING(ring, 0x00000000);
 177        OUT_RING(ring, 0x00000000);
 178        OUT_RING(ring, 0x00000000);
 179
 180        adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
 181        return a4xx_idle(gpu);
 182}
 183
 184static int a4xx_hw_init(struct msm_gpu *gpu)
 185{
 186        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 187        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 188        uint32_t *ptr, len;
 189        int i, ret;
 190
 191        if (adreno_is_a405(adreno_gpu)) {
 192                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 193        } else if (adreno_is_a420(adreno_gpu)) {
 194                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
 195                gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
 196                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 197                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 198                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 199                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 200                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 201                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 202        } else if (adreno_is_a430(adreno_gpu)) {
 203                gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
 204                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 205                gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
 206                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 207                gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 208                gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 209        } else {
 210                BUG();
 211        }
 212
 213        /* Make all blocks contribute to the GPU BUSY perf counter */
 214        gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
 215
 216        /* Tune the hystersis counters for SP and CP idle detection */
 217        gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
 218        gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
 219
 220        if (adreno_is_a430(adreno_gpu)) {
 221                gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
 222        }
 223
 224         /* Enable the RBBM error reporting bits */
 225        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
 226
 227        /* Enable AHB error reporting*/
 228        gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
 229
 230        /* Enable power counters*/
 231        gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
 232
 233        /*
 234         * Turn on hang detection - this spews a lot of useful information
 235         * into the RBBM registers on a hang:
 236         */
 237        gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
 238                        (1 << 30) | 0xFFFF);
 239
 240        gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
 241                        (unsigned int)(a4xx_gpu->ocmem.base >> 14));
 242
 243        /* Turn on performance counters: */
 244        gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
 245
 246        /* use the first CP counter for timestamp queries.. userspace may set
 247         * this as well but it selects the same counter/countable:
 248         */
 249        gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
 250
 251        if (adreno_is_a430(adreno_gpu))
 252                gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
 253
 254        /* Disable L2 bypass to avoid UCHE out of bounds errors */
 255        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
 256        gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
 257
 258        gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
 259                        (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
 260
 261        /* On A430 enable SP regfile sleep for power savings */
 262        /* TODO downstream does this for !420, so maybe applies for 405 too? */
 263        if (!adreno_is_a420(adreno_gpu)) {
 264                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
 265                        0x00000441);
 266                gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
 267                        0x00000441);
 268        }
 269
 270        a4xx_enable_hwcg(gpu);
 271
 272        /*
 273         * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
 274         * due to timing issue with HLSQ_TP_CLK_EN
 275         */
 276        if (adreno_is_a420(adreno_gpu)) {
 277                unsigned int val;
 278                val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
 279                val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
 280                val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
 281                gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
 282        }
 283
 284        /* setup access protection: */
 285        gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
 286
 287        /* RBBM registers */
 288        gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
 289        gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
 290        gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
 291        gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
 292        gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
 293        gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
 294
 295        /* CP registers */
 296        gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
 297        gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
 298
 299
 300        /* RB registers */
 301        gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
 302
 303        /* HLSQ registers */
 304        gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
 305
 306        /* VPC registers */
 307        gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
 308
 309        /* SMMU registers */
 310        gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
 311
 312        gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
 313
 314        ret = adreno_hw_init(gpu);
 315        if (ret)
 316                return ret;
 317
 318        /*
 319         * Use the default ringbuffer size and block size but disable the RPTR
 320         * shadow
 321         */
 322        gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
 323                MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 324
 325        /* Set the ringbuffer address */
 326        gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
 327
 328        /* Load PM4: */
 329        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
 330        len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
 331        DBG("loading PM4 ucode version: %u", ptr[0]);
 332        gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
 333        for (i = 1; i < len; i++)
 334                gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
 335
 336        /* Load PFP: */
 337        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
 338        len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
 339        DBG("loading PFP ucode version: %u", ptr[0]);
 340
 341        gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
 342        for (i = 1; i < len; i++)
 343                gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
 344
 345        /* clear ME_HALT to start micro engine */
 346        gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
 347
 348        return a4xx_me_init(gpu) ? 0 : -EINVAL;
 349}
 350
 351static void a4xx_recover(struct msm_gpu *gpu)
 352{
 353        int i;
 354
 355        adreno_dump_info(gpu);
 356
 357        for (i = 0; i < 8; i++) {
 358                printk("CP_SCRATCH_REG%d: %u\n", i,
 359                        gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
 360        }
 361
 362        /* dump registers before resetting gpu, if enabled: */
 363        if (hang_debug)
 364                a4xx_dump(gpu);
 365
 366        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
 367        gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
 368        gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
 369        adreno_recover(gpu);
 370}
 371
 372static void a4xx_destroy(struct msm_gpu *gpu)
 373{
 374        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 375        struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
 376
 377        DBG("%s", gpu->name);
 378
 379        adreno_gpu_cleanup(adreno_gpu);
 380
 381        adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
 382
 383        kfree(a4xx_gpu);
 384}
 385
 386static bool a4xx_idle(struct msm_gpu *gpu)
 387{
 388        /* wait for ringbuffer to drain: */
 389        if (!adreno_idle(gpu, gpu->rb[0]))
 390                return false;
 391
 392        /* then wait for GPU to finish: */
 393        if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
 394                                        A4XX_RBBM_STATUS_GPU_BUSY))) {
 395                DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 396                /* TODO maybe we need to reset GPU here to recover from hang? */
 397                return false;
 398        }
 399
 400        return true;
 401}
 402
 403static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
 404{
 405        uint32_t status;
 406
 407        status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
 408        DBG("%s: Int status %08x", gpu->name, status);
 409
 410        if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
 411                uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
 412                printk("CP | Protected mode error| %s | addr=%x\n",
 413                        reg & (1 << 24) ? "WRITE" : "READ",
 414                        (reg & 0xFFFFF) >> 2);
 415        }
 416
 417        gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
 418
 419        msm_gpu_retire(gpu);
 420
 421        return IRQ_HANDLED;
 422}
 423
 424static const unsigned int a4xx_registers[] = {
 425        /* RBBM */
 426        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 427        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 428        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 429        /* CP */
 430        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 431        0x0578, 0x058F,
 432        /* VSC */
 433        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 434        /* GRAS */
 435        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 436        /* RB */
 437        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 438        /* PC */
 439        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 440        /* VFD */
 441        0x0E40, 0x0E4A,
 442        /* VPC */
 443        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 444        /* UCHE */
 445        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 446        /* VMIDMT */
 447        0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
 448        0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
 449        0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
 450        0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
 451        0x1380, 0x1380,
 452        /* GRAS CTX 0 */
 453        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 454        /* PC CTX 0 */
 455        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 456        /* VFD CTX 0 */
 457        0x2200, 0x2204, 0x2208, 0x22A9,
 458        /* GRAS CTX 1 */
 459        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 460        /* PC CTX 1 */
 461        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 462        /* VFD CTX 1 */
 463        0x2600, 0x2604, 0x2608, 0x26A9,
 464        /* XPU */
 465        0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
 466        0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
 467        0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
 468        /* VBIF */
 469        0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
 470        0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
 471        0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
 472        0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
 473        0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
 474        0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
 475        0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
 476        0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
 477        0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
 478        0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
 479        0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
 480        0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
 481        0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
 482        0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
 483        0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
 484        0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
 485        0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
 486        0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
 487        0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
 488        0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
 489        0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
 490        0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
 491        0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
 492        0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
 493        0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
 494        0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
 495        0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
 496        0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
 497        0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
 498        0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
 499        0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
 500        0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
 501        0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
 502        0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
 503        ~0 /* sentinel */
 504};
 505
 506static const unsigned int a405_registers[] = {
 507        /* RBBM */
 508        0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
 509        0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
 510        0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
 511        /* CP */
 512        0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
 513        0x0578, 0x058F,
 514        /* VSC */
 515        0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
 516        /* GRAS */
 517        0x0C80, 0x0C81, 0x0C88, 0x0C8F,
 518        /* RB */
 519        0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
 520        /* PC */
 521        0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
 522        /* VFD */
 523        0x0E40, 0x0E4A,
 524        /* VPC */
 525        0x0E60, 0x0E61, 0x0E63, 0x0E68,
 526        /* UCHE */
 527        0x0E80, 0x0E84, 0x0E88, 0x0E95,
 528        /* GRAS CTX 0 */
 529        0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
 530        /* PC CTX 0 */
 531        0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
 532        /* VFD CTX 0 */
 533        0x2200, 0x2204, 0x2208, 0x22A9,
 534        /* GRAS CTX 1 */
 535        0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
 536        /* PC CTX 1 */
 537        0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
 538        /* VFD CTX 1 */
 539        0x2600, 0x2604, 0x2608, 0x26A9,
 540        /* VBIF version 0x20050000*/
 541        0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
 542        0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
 543        0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
 544        0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
 545        0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
 546        0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
 547        0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
 548        0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
 549        ~0 /* sentinel */
 550};
 551
 552static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
 553{
 554        struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 555
 556        if (!state)
 557                return ERR_PTR(-ENOMEM);
 558
 559        adreno_gpu_state_get(gpu, state);
 560
 561        state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
 562
 563        return state;
 564}
 565
 566static void a4xx_dump(struct msm_gpu *gpu)
 567{
 568        printk("status:   %08x\n",
 569                        gpu_read(gpu, REG_A4XX_RBBM_STATUS));
 570        adreno_dump(gpu);
 571}
 572
 573static int a4xx_pm_resume(struct msm_gpu *gpu) {
 574        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 575        int ret;
 576
 577        ret = msm_gpu_pm_resume(gpu);
 578        if (ret)
 579                return ret;
 580
 581        if (adreno_is_a430(adreno_gpu)) {
 582                unsigned int reg;
 583                /* Set the default register values; set SW_COLLAPSE to 0 */
 584                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
 585                do {
 586                        udelay(5);
 587                        reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
 588                } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
 589        }
 590        return 0;
 591}
 592
 593static int a4xx_pm_suspend(struct msm_gpu *gpu) {
 594        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 595        int ret;
 596
 597        ret = msm_gpu_pm_suspend(gpu);
 598        if (ret)
 599                return ret;
 600
 601        if (adreno_is_a430(adreno_gpu)) {
 602                /* Set the default register values; set SW_COLLAPSE to 1 */
 603                gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
 604        }
 605        return 0;
 606}
 607
 608static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 609{
 610        *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
 611                REG_A4XX_RBBM_PERFCTR_CP_0_HI);
 612
 613        return 0;
 614}
 615
 616static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 617{
 618        ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
 619        return ring->memptrs->rptr;
 620}
 621
 622static const struct adreno_gpu_funcs funcs = {
 623        .base = {
 624                .get_param = adreno_get_param,
 625                .hw_init = a4xx_hw_init,
 626                .pm_suspend = a4xx_pm_suspend,
 627                .pm_resume = a4xx_pm_resume,
 628                .recover = a4xx_recover,
 629                .submit = a4xx_submit,
 630                .active_ring = adreno_active_ring,
 631                .irq = a4xx_irq,
 632                .destroy = a4xx_destroy,
 633#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 634                .show = adreno_show,
 635#endif
 636                .gpu_state_get = a4xx_gpu_state_get,
 637                .gpu_state_put = adreno_gpu_state_put,
 638                .create_address_space = adreno_iommu_create_address_space,
 639                .get_rptr = a4xx_get_rptr,
 640        },
 641        .get_timestamp = a4xx_get_timestamp,
 642};
 643
 644struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 645{
 646        struct a4xx_gpu *a4xx_gpu = NULL;
 647        struct adreno_gpu *adreno_gpu;
 648        struct msm_gpu *gpu;
 649        struct msm_drm_private *priv = dev->dev_private;
 650        struct platform_device *pdev = priv->gpu_pdev;
 651        struct icc_path *ocmem_icc_path;
 652        struct icc_path *icc_path;
 653        int ret;
 654
 655        if (!pdev) {
 656                DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
 657                ret = -ENXIO;
 658                goto fail;
 659        }
 660
 661        a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
 662        if (!a4xx_gpu) {
 663                ret = -ENOMEM;
 664                goto fail;
 665        }
 666
 667        adreno_gpu = &a4xx_gpu->base;
 668        gpu = &adreno_gpu->base;
 669
 670        gpu->perfcntrs = NULL;
 671        gpu->num_perfcntrs = 0;
 672
 673        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
 674        if (ret)
 675                goto fail;
 676
 677        adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
 678                                                             a4xx_registers;
 679
 680        /* if needed, allocate gmem: */
 681        ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
 682                                    &a4xx_gpu->ocmem);
 683        if (ret)
 684                goto fail;
 685
 686        if (!gpu->aspace) {
 687                /* TODO we think it is possible to configure the GPU to
 688                 * restrict access to VRAM carveout.  But the required
 689                 * registers are unknown.  For now just bail out and
 690                 * limp along with just modesetting.  If it turns out
 691                 * to not be possible to restrict access, then we must
 692                 * implement a cmdstream validator.
 693                 */
 694                DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
 695                if (!allow_vram_carveout) {
 696                        ret = -ENXIO;
 697                        goto fail;
 698                }
 699        }
 700
 701        icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
 702        if (IS_ERR(icc_path)) {
 703                ret = PTR_ERR(icc_path);
 704                goto fail;
 705        }
 706
 707        ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
 708        if (IS_ERR(ocmem_icc_path)) {
 709                ret = PTR_ERR(ocmem_icc_path);
 710                /* allow -ENODATA, ocmem icc is optional */
 711                if (ret != -ENODATA)
 712                        goto fail;
 713                ocmem_icc_path = NULL;
 714        }
 715
 716        /*
 717         * Set the ICC path to maximum speed for now by multiplying the fastest
 718         * frequency by the bus width (8). We'll want to scale this later on to
 719         * improve battery life.
 720         */
 721        icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 722        icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 723
 724        return gpu;
 725
 726fail:
 727        if (a4xx_gpu)
 728                a4xx_destroy(&a4xx_gpu->base.base);
 729
 730        return ERR_PTR(ret);
 731}
 732