linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2013 Red Hat
   4 * Author: Rob Clark <robdclark@gmail.com>
   5 *
   6 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
   7 */
   8
   9#include "a3xx_gpu.h"
  10
  11#define A3XX_INT0_MASK \
  12        (A3XX_INT0_RBBM_AHB_ERROR |        \
  13         A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
  14         A3XX_INT0_CP_T0_PACKET_IN_IB |    \
  15         A3XX_INT0_CP_OPCODE_ERROR |       \
  16         A3XX_INT0_CP_RESERVED_BIT_ERROR | \
  17         A3XX_INT0_CP_HW_FAULT |           \
  18         A3XX_INT0_CP_IB1_INT |            \
  19         A3XX_INT0_CP_IB2_INT |            \
  20         A3XX_INT0_CP_RB_INT |             \
  21         A3XX_INT0_CP_REG_PROTECT_FAULT |  \
  22         A3XX_INT0_CP_AHB_ERROR_HALT |     \
  23         A3XX_INT0_CACHE_FLUSH_TS |        \
  24         A3XX_INT0_UCHE_OOB_ACCESS)
  25
  26extern bool hang_debug;
  27
  28static void a3xx_dump(struct msm_gpu *gpu);
  29static bool a3xx_idle(struct msm_gpu *gpu);
  30
  31static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  32{
  33        struct msm_drm_private *priv = gpu->dev->dev_private;
  34        struct msm_ringbuffer *ring = submit->ring;
  35        unsigned int i;
  36
  37        for (i = 0; i < submit->nr_cmds; i++) {
  38                switch (submit->cmd[i].type) {
  39                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  40                        /* ignore IB-targets */
  41                        break;
  42                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  43                        /* ignore if there has not been a ctx switch: */
  44                        if (priv->lastctx == submit->queue->ctx)
  45                                break;
  46                        fallthrough;
  47                case MSM_SUBMIT_CMD_BUF:
  48                        OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
  49                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
  50                        OUT_RING(ring, submit->cmd[i].size);
  51                        OUT_PKT2(ring);
  52                        break;
  53                }
  54        }
  55
  56        OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
  57        OUT_RING(ring, submit->seqno);
  58
  59        /* Flush HLSQ lazy updates to make sure there is nothing
  60         * pending for indirect loads after the timestamp has
  61         * passed:
  62         */
  63        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
  64        OUT_RING(ring, HLSQ_FLUSH);
  65
  66        /* wait for idle before cache flush/interrupt */
  67        OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
  68        OUT_RING(ring, 0x00000000);
  69
  70        /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
  71        OUT_PKT3(ring, CP_EVENT_WRITE, 3);
  72        OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
  73        OUT_RING(ring, rbmemptr(ring, fence));
  74        OUT_RING(ring, submit->seqno);
  75
  76#if 0
  77        /* Dummy set-constant to trigger context rollover */
  78        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
  79        OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
  80        OUT_RING(ring, 0x00000000);
  81#endif
  82
  83        adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
  84}
  85
  86static bool a3xx_me_init(struct msm_gpu *gpu)
  87{
  88        struct msm_ringbuffer *ring = gpu->rb[0];
  89
  90        OUT_PKT3(ring, CP_ME_INIT, 17);
  91        OUT_RING(ring, 0x000003f7);
  92        OUT_RING(ring, 0x00000000);
  93        OUT_RING(ring, 0x00000000);
  94        OUT_RING(ring, 0x00000000);
  95        OUT_RING(ring, 0x00000080);
  96        OUT_RING(ring, 0x00000100);
  97        OUT_RING(ring, 0x00000180);
  98        OUT_RING(ring, 0x00006600);
  99        OUT_RING(ring, 0x00000150);
 100        OUT_RING(ring, 0x0000014e);
 101        OUT_RING(ring, 0x00000154);
 102        OUT_RING(ring, 0x00000001);
 103        OUT_RING(ring, 0x00000000);
 104        OUT_RING(ring, 0x00000000);
 105        OUT_RING(ring, 0x00000000);
 106        OUT_RING(ring, 0x00000000);
 107        OUT_RING(ring, 0x00000000);
 108
 109        adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
 110        return a3xx_idle(gpu);
 111}
 112
 113static int a3xx_hw_init(struct msm_gpu *gpu)
 114{
 115        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 116        struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
 117        uint32_t *ptr, len;
 118        int i, ret;
 119
 120        DBG("%s", gpu->name);
 121
 122        if (adreno_is_a305(adreno_gpu)) {
 123                /* Set up 16 deep read/write request queues: */
 124                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
 125                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
 126                gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
 127                gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
 128                gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
 129                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
 130                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
 131                /* Enable WR-REQ: */
 132                gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
 133                /* Set up round robin arbitration between both AXI ports: */
 134                gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
 135                /* Set up AOOO: */
 136                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
 137                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
 138        } else if (adreno_is_a306(adreno_gpu)) {
 139                gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
 140                gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
 141                gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
 142        } else if (adreno_is_a320(adreno_gpu)) {
 143                /* Set up 16 deep read/write request queues: */
 144                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
 145                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
 146                gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
 147                gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
 148                gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
 149                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
 150                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
 151                /* Enable WR-REQ: */
 152                gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
 153                /* Set up round robin arbitration between both AXI ports: */
 154                gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
 155                /* Set up AOOO: */
 156                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
 157                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
 158                /* Enable 1K sort: */
 159                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
 160                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 161
 162        } else if (adreno_is_a330v2(adreno_gpu)) {
 163                /*
 164                 * Most of the VBIF registers on 8974v2 have the correct
 165                 * values at power on, so we won't modify those if we don't
 166                 * need to
 167                 */
 168                /* Enable 1k sort: */
 169                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
 170                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 171                /* Enable WR-REQ: */
 172                gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
 173                gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
 174                /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
 175                gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
 176
 177        } else if (adreno_is_a330(adreno_gpu)) {
 178                /* Set up 16 deep read/write request queues: */
 179                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
 180                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
 181                gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
 182                gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
 183                gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
 184                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 185                gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
 186                /* Enable WR-REQ: */
 187                gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
 188                /* Set up round robin arbitration between both AXI ports: */
 189                gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
 190                /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
 191                gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
 192                /* Set up AOOO: */
 193                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
 194                gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
 195                /* Enable 1K sort: */
 196                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
 197                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 198                /* Disable VBIF clock gating. This is to enable AXI running
 199                 * higher frequency than GPU:
 200                 */
 201                gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
 202
 203        } else {
 204                BUG();
 205        }
 206
 207        /* Make all blocks contribute to the GPU BUSY perf counter: */
 208        gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
 209
 210        /* Tune the hystersis counters for SP and CP idle detection: */
 211        gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
 212        gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
 213
 214        /* Enable the RBBM error reporting bits.  This lets us get
 215         * useful information on failure:
 216         */
 217        gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
 218
 219        /* Enable AHB error reporting: */
 220        gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
 221
 222        /* Turn on the power counters: */
 223        gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
 224
 225        /* Turn on hang detection - this spews a lot of useful information
 226         * into the RBBM registers on a hang:
 227         */
 228        gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
 229
 230        /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
 231        gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
 232
 233        /* Enable Clock gating: */
 234        if (adreno_is_a306(adreno_gpu))
 235                gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
 236        else if (adreno_is_a320(adreno_gpu))
 237                gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
 238        else if (adreno_is_a330v2(adreno_gpu))
 239                gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
 240        else if (adreno_is_a330(adreno_gpu))
 241                gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
 242
 243        if (adreno_is_a330v2(adreno_gpu))
 244                gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
 245        else if (adreno_is_a330(adreno_gpu))
 246                gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
 247
 248        /* Set the OCMEM base address for A330, etc */
 249        if (a3xx_gpu->ocmem.hdl) {
 250                gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
 251                        (unsigned int)(a3xx_gpu->ocmem.base >> 14));
 252        }
 253
 254        /* Turn on performance counters: */
 255        gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
 256
 257        /* Enable the perfcntrs that we use.. */
 258        for (i = 0; i < gpu->num_perfcntrs; i++) {
 259                const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
 260                gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
 261        }
 262
 263        gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
 264
 265        ret = adreno_hw_init(gpu);
 266        if (ret)
 267                return ret;
 268
 269        /*
 270         * Use the default ringbuffer size and block size but disable the RPTR
 271         * shadow
 272         */
 273        gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
 274                MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 275
 276        /* Set the ringbuffer address */
 277        gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
 278
 279        /* setup access protection: */
 280        gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
 281
 282        /* RBBM registers */
 283        gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
 284        gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
 285        gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
 286        gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
 287        gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
 288        gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
 289
 290        /* CP registers */
 291        gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
 292        gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
 293        gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
 294        gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
 295        gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
 296
 297        /* RB registers */
 298        gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
 299
 300        /* VBIF registers */
 301        gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
 302
 303        /* NOTE: PM4/micro-engine firmware registers look to be the same
 304         * for a2xx and a3xx.. we could possibly push that part down to
 305         * adreno_gpu base class.  Or push both PM4 and PFP but
 306         * parameterize the pfp ucode addr/data registers..
 307         */
 308
 309        /* Load PM4: */
 310        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
 311        len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
 312        DBG("loading PM4 ucode version: %x", ptr[1]);
 313
 314        gpu_write(gpu, REG_AXXX_CP_DEBUG,
 315                        AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
 316                        AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
 317        gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
 318        for (i = 1; i < len; i++)
 319                gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
 320
 321        /* Load PFP: */
 322        ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
 323        len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
 324        DBG("loading PFP ucode version: %x", ptr[5]);
 325
 326        gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
 327        for (i = 1; i < len; i++)
 328                gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
 329
 330        /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
 331        if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
 332                        adreno_is_a320(adreno_gpu)) {
 333                gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
 334                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
 335                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
 336                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
 337        } else if (adreno_is_a330(adreno_gpu)) {
 338                /* NOTE: this (value take from downstream android driver)
 339                 * includes some bits outside of the known bitfields.  But
 340                 * A330 has this "MERCIU queue" thing too, which might
 341                 * explain a new bitfield or reshuffling:
 342                 */
 343                gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
 344        }
 345
 346        /* clear ME_HALT to start micro engine */
 347        gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
 348
 349        return a3xx_me_init(gpu) ? 0 : -EINVAL;
 350}
 351
 352static void a3xx_recover(struct msm_gpu *gpu)
 353{
 354        int i;
 355
 356        adreno_dump_info(gpu);
 357
 358        for (i = 0; i < 8; i++) {
 359                printk("CP_SCRATCH_REG%d: %u\n", i,
 360                        gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
 361        }
 362
 363        /* dump registers before resetting gpu, if enabled: */
 364        if (hang_debug)
 365                a3xx_dump(gpu);
 366
 367        gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
 368        gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
 369        gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
 370        adreno_recover(gpu);
 371}
 372
 373static void a3xx_destroy(struct msm_gpu *gpu)
 374{
 375        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 376        struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
 377
 378        DBG("%s", gpu->name);
 379
 380        adreno_gpu_cleanup(adreno_gpu);
 381
 382        adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
 383
 384        kfree(a3xx_gpu);
 385}
 386
 387static bool a3xx_idle(struct msm_gpu *gpu)
 388{
 389        /* wait for ringbuffer to drain: */
 390        if (!adreno_idle(gpu, gpu->rb[0]))
 391                return false;
 392
 393        /* then wait for GPU to finish: */
 394        if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
 395                        A3XX_RBBM_STATUS_GPU_BUSY))) {
 396                DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 397
 398                /* TODO maybe we need to reset GPU here to recover from hang? */
 399                return false;
 400        }
 401
 402        return true;
 403}
 404
 405static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
 406{
 407        uint32_t status;
 408
 409        status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
 410        DBG("%s: %08x", gpu->name, status);
 411
 412        // TODO
 413
 414        gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
 415
 416        msm_gpu_retire(gpu);
 417
 418        return IRQ_HANDLED;
 419}
 420
 421static const unsigned int a3xx_registers[] = {
 422        0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
 423        0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
 424        0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
 425        0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
 426        0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
 427        0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
 428        0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
 429        0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
 430        0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
 431        0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
 432        0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
 433        0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
 434        0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
 435        0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
 436        0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
 437        0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
 438        0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
 439        0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
 440        0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
 441        0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
 442        0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
 443        0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
 444        0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
 445        0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
 446        0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
 447        0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
 448        0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
 449        0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
 450        0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
 451        0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
 452        0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
 453        0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
 454        0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
 455        0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
 456        ~0   /* sentinel */
 457};
 458
 459/* would be nice to not have to duplicate the _show() stuff with printk(): */
 460static void a3xx_dump(struct msm_gpu *gpu)
 461{
 462        printk("status:   %08x\n",
 463                        gpu_read(gpu, REG_A3XX_RBBM_STATUS));
 464        adreno_dump(gpu);
 465}
 466
 467static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
 468{
 469        struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 470
 471        if (!state)
 472                return ERR_PTR(-ENOMEM);
 473
 474        adreno_gpu_state_get(gpu, state);
 475
 476        state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
 477
 478        return state;
 479}
 480
 481static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 482{
 483        ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
 484        return ring->memptrs->rptr;
 485}
 486
 487static const struct adreno_gpu_funcs funcs = {
 488        .base = {
 489                .get_param = adreno_get_param,
 490                .hw_init = a3xx_hw_init,
 491                .pm_suspend = msm_gpu_pm_suspend,
 492                .pm_resume = msm_gpu_pm_resume,
 493                .recover = a3xx_recover,
 494                .submit = a3xx_submit,
 495                .active_ring = adreno_active_ring,
 496                .irq = a3xx_irq,
 497                .destroy = a3xx_destroy,
 498#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 499                .show = adreno_show,
 500#endif
 501                .gpu_state_get = a3xx_gpu_state_get,
 502                .gpu_state_put = adreno_gpu_state_put,
 503                .create_address_space = adreno_iommu_create_address_space,
 504                .get_rptr = a3xx_get_rptr,
 505        },
 506};
 507
 508static const struct msm_gpu_perfcntr perfcntrs[] = {
 509        { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
 510                        SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
 511        { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
 512                        SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
 513};
 514
 515struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 516{
 517        struct a3xx_gpu *a3xx_gpu = NULL;
 518        struct adreno_gpu *adreno_gpu;
 519        struct msm_gpu *gpu;
 520        struct msm_drm_private *priv = dev->dev_private;
 521        struct platform_device *pdev = priv->gpu_pdev;
 522        struct icc_path *ocmem_icc_path;
 523        struct icc_path *icc_path;
 524        int ret;
 525
 526        if (!pdev) {
 527                DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
 528                ret = -ENXIO;
 529                goto fail;
 530        }
 531
 532        a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
 533        if (!a3xx_gpu) {
 534                ret = -ENOMEM;
 535                goto fail;
 536        }
 537
 538        adreno_gpu = &a3xx_gpu->base;
 539        gpu = &adreno_gpu->base;
 540
 541        gpu->perfcntrs = perfcntrs;
 542        gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
 543
 544        adreno_gpu->registers = a3xx_registers;
 545
 546        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
 547        if (ret)
 548                goto fail;
 549
 550        /* if needed, allocate gmem: */
 551        if (adreno_is_a330(adreno_gpu)) {
 552                ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
 553                                            adreno_gpu, &a3xx_gpu->ocmem);
 554                if (ret)
 555                        goto fail;
 556        }
 557
 558        if (!gpu->aspace) {
 559                /* TODO we think it is possible to configure the GPU to
 560                 * restrict access to VRAM carveout.  But the required
 561                 * registers are unknown.  For now just bail out and
 562                 * limp along with just modesetting.  If it turns out
 563                 * to not be possible to restrict access, then we must
 564                 * implement a cmdstream validator.
 565                 */
 566                DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
 567                if (!allow_vram_carveout) {
 568                        ret = -ENXIO;
 569                        goto fail;
 570                }
 571        }
 572
 573        icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
 574        if (IS_ERR(icc_path)) {
 575                ret = PTR_ERR(icc_path);
 576                goto fail;
 577        }
 578
 579        ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
 580        if (IS_ERR(ocmem_icc_path)) {
 581                ret = PTR_ERR(ocmem_icc_path);
 582                /* allow -ENODATA, ocmem icc is optional */
 583                if (ret != -ENODATA)
 584                        goto fail;
 585                ocmem_icc_path = NULL;
 586        }
 587
 588
 589        /*
 590         * Set the ICC path to maximum speed for now by multiplying the fastest
 591         * frequency by the bus width (8). We'll want to scale this later on to
 592         * improve battery life.
 593         */
 594        icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 595        icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
 596
 597        return gpu;
 598
 599fail:
 600        if (a3xx_gpu)
 601                a3xx_destroy(&a3xx_gpu->base.base);
 602
 603        return ERR_PTR(ret);
 604}
 605