linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
<<
>>
Prefs
   1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 and
   5 * only version 2 as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/types.h>
  16#include <linux/cpumask.h>
  17#include <linux/qcom_scm.h>
  18#include <linux/dma-mapping.h>
  19#include <linux/of_address.h>
  20#include <linux/soc/qcom/mdt_loader.h>
  21#include <linux/pm_opp.h>
  22#include <linux/nvmem-consumer.h>
  23#include <linux/iopoll.h>
  24#include <linux/slab.h>
  25#include "msm_gem.h"
  26#include "msm_mmu.h"
  27#include "a5xx_gpu.h"
  28
  29extern bool hang_debug;
  30static void a5xx_dump(struct msm_gpu *gpu);
  31
  32#define GPU_PAS_ID 13
  33
  34static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  35{
  36        struct device *dev = &gpu->pdev->dev;
  37        const struct firmware *fw;
  38        struct device_node *np;
  39        struct resource r;
  40        phys_addr_t mem_phys;
  41        ssize_t mem_size;
  42        void *mem_region = NULL;
  43        int ret;
  44
  45        if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  46                return -EINVAL;
  47
  48        np = of_get_child_by_name(dev->of_node, "zap-shader");
  49        if (!np)
  50                return -ENODEV;
  51
  52        np = of_parse_phandle(np, "memory-region", 0);
  53        if (!np)
  54                return -EINVAL;
  55
  56        ret = of_address_to_resource(np, 0, &r);
  57        if (ret)
  58                return ret;
  59
  60        mem_phys = r.start;
  61        mem_size = resource_size(&r);
  62
  63        /* Request the MDT file for the firmware */
  64        fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  65        if (IS_ERR(fw)) {
  66                DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  67                return PTR_ERR(fw);
  68        }
  69
  70        /* Figure out how much memory we need */
  71        mem_size = qcom_mdt_get_size(fw);
  72        if (mem_size < 0) {
  73                ret = mem_size;
  74                goto out;
  75        }
  76
  77        /* Allocate memory for the firmware image */
  78        mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  79        if (!mem_region) {
  80                ret = -ENOMEM;
  81                goto out;
  82        }
  83
  84        /*
  85         * Load the rest of the MDT
  86         *
  87         * Note that we could be dealing with two different paths, since
  88         * with upstream linux-firmware it would be in a qcom/ subdir..
  89         * adreno_request_fw() handles this, but qcom_mdt_load() does
  90         * not.  But since we've already gotten thru adreno_request_fw()
  91         * we know which of the two cases it is:
  92         */
  93        if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  94                ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  95                                mem_region, mem_phys, mem_size, NULL);
  96        } else {
  97                char *newname;
  98
  99                newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
 100
 101                ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
 102                                mem_region, mem_phys, mem_size, NULL);
 103                kfree(newname);
 104        }
 105        if (ret)
 106                goto out;
 107
 108        /* Send the image to the secure world */
 109        ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 110        if (ret)
 111                DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 112
 113out:
 114        if (mem_region)
 115                memunmap(mem_region);
 116
 117        release_firmware(fw);
 118
 119        return ret;
 120}
 121
 122static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 123{
 124        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 125        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 126        uint32_t wptr;
 127        unsigned long flags;
 128
 129        spin_lock_irqsave(&ring->lock, flags);
 130
 131        /* Copy the shadow to the actual register */
 132        ring->cur = ring->next;
 133
 134        /* Make sure to wrap wptr if we need to */
 135        wptr = get_wptr(ring);
 136
 137        spin_unlock_irqrestore(&ring->lock, flags);
 138
 139        /* Make sure everything is posted before making a decision */
 140        mb();
 141
 142        /* Update HW if this is the current ring and we are not in preempt */
 143        if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 144                gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 145}
 146
 147static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 148        struct msm_file_private *ctx)
 149{
 150        struct msm_drm_private *priv = gpu->dev->dev_private;
 151        struct msm_ringbuffer *ring = submit->ring;
 152        struct msm_gem_object *obj;
 153        uint32_t *ptr, dwords;
 154        unsigned int i;
 155
 156        for (i = 0; i < submit->nr_cmds; i++) {
 157                switch (submit->cmd[i].type) {
 158                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 159                        break;
 160                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 161                        if (priv->lastctx == ctx)
 162                                break;
 163                case MSM_SUBMIT_CMD_BUF:
 164                        /* copy commands into RB: */
 165                        obj = submit->bos[submit->cmd[i].idx].obj;
 166                        dwords = submit->cmd[i].size;
 167
 168                        ptr = msm_gem_get_vaddr(&obj->base);
 169
 170                        /* _get_vaddr() shouldn't fail at this point,
 171                         * since we've already mapped it once in
 172                         * submit_reloc()
 173                         */
 174                        if (WARN_ON(!ptr))
 175                                return;
 176
 177                        for (i = 0; i < dwords; i++) {
 178                                /* normally the OUT_PKTn() would wait
 179                                 * for space for the packet.  But since
 180                                 * we just OUT_RING() the whole thing,
 181                                 * need to call adreno_wait_ring()
 182                                 * ourself:
 183                                 */
 184                                adreno_wait_ring(ring, 1);
 185                                OUT_RING(ring, ptr[i]);
 186                        }
 187
 188                        msm_gem_put_vaddr(&obj->base);
 189
 190                        break;
 191                }
 192        }
 193
 194        a5xx_flush(gpu, ring);
 195        a5xx_preempt_trigger(gpu);
 196
 197        /* we might not necessarily have a cmd from userspace to
 198         * trigger an event to know that submit has completed, so
 199         * do this manually:
 200         */
 201        a5xx_idle(gpu, ring);
 202        ring->memptrs->fence = submit->seqno;
 203        msm_gpu_retire(gpu);
 204}
 205
 206static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 207        struct msm_file_private *ctx)
 208{
 209        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 210        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 211        struct msm_drm_private *priv = gpu->dev->dev_private;
 212        struct msm_ringbuffer *ring = submit->ring;
 213        unsigned int i, ibs = 0;
 214
 215        if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 216                priv->lastctx = NULL;
 217                a5xx_submit_in_rb(gpu, submit, ctx);
 218                return;
 219        }
 220
 221        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 222        OUT_RING(ring, 0x02);
 223
 224        /* Turn off protected mode to write to special registers */
 225        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 226        OUT_RING(ring, 0);
 227
 228        /* Set the save preemption record for the ring/command */
 229        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 230        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 231        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 232
 233        /* Turn back on protected mode */
 234        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 235        OUT_RING(ring, 1);
 236
 237        /* Enable local preemption for finegrain preemption */
 238        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 239        OUT_RING(ring, 0x02);
 240
 241        /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 242        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 243        OUT_RING(ring, 0x02);
 244
 245        /* Submit the commands */
 246        for (i = 0; i < submit->nr_cmds; i++) {
 247                switch (submit->cmd[i].type) {
 248                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 249                        break;
 250                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 251                        if (priv->lastctx == ctx)
 252                                break;
 253                case MSM_SUBMIT_CMD_BUF:
 254                        OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 255                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 256                        OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 257                        OUT_RING(ring, submit->cmd[i].size);
 258                        ibs++;
 259                        break;
 260                }
 261        }
 262
 263        /*
 264         * Write the render mode to NULL (0) to indicate to the CP that the IBs
 265         * are done rendering - otherwise a lucky preemption would start
 266         * replaying from the last checkpoint
 267         */
 268        OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 269        OUT_RING(ring, 0);
 270        OUT_RING(ring, 0);
 271        OUT_RING(ring, 0);
 272        OUT_RING(ring, 0);
 273        OUT_RING(ring, 0);
 274
 275        /* Turn off IB level preemptions */
 276        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 277        OUT_RING(ring, 0x01);
 278
 279        /* Write the fence to the scratch register */
 280        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 281        OUT_RING(ring, submit->seqno);
 282
 283        /*
 284         * Execute a CACHE_FLUSH_TS event. This will ensure that the
 285         * timestamp is written to the memory and then triggers the interrupt
 286         */
 287        OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 288        OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 289        OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 290        OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 291        OUT_RING(ring, submit->seqno);
 292
 293        /* Yield the floor on command completion */
 294        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 295        /*
 296         * If dword[2:1] are non zero, they specify an address for the CP to
 297         * write the value of dword[3] to on preemption complete. Write 0 to
 298         * skip the write
 299         */
 300        OUT_RING(ring, 0x00);
 301        OUT_RING(ring, 0x00);
 302        /* Data value - not used if the address above is 0 */
 303        OUT_RING(ring, 0x01);
 304        /* Set bit 0 to trigger an interrupt on preempt complete */
 305        OUT_RING(ring, 0x01);
 306
 307        a5xx_flush(gpu, ring);
 308
 309        /* Check to see if we need to start preemption */
 310        a5xx_preempt_trigger(gpu);
 311}
 312
 313static const struct {
 314        u32 offset;
 315        u32 value;
 316} a5xx_hwcg[] = {
 317        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 318        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 319        {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 320        {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 321        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 322        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 323        {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 324        {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 325        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 326        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 327        {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 328        {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 329        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 330        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 331        {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 332        {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 333        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 334        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 335        {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 336        {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 337        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 338        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 339        {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 340        {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 341        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 342        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 343        {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 344        {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 345        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 346        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 347        {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 348        {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 349        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 350        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 351        {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 352        {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 353        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 354        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 355        {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 356        {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 357        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 358        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 359        {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 360        {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 361        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 362        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 363        {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 364        {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 365        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 366        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 367        {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 368        {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 369        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 370        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 371        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 372        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 373        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 374        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 375        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 376        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 377        {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 378        {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 379        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 380        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 381        {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 382        {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 383        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 384        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 385        {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 386        {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 387        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 388        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 389        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 390        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 391        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 392        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 393        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 394        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 395        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 396        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 397        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 398        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 399        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 400        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 401        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 402        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 403        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 404        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 405        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 406        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 407        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 408        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 409};
 410
 411void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 412{
 413        unsigned int i;
 414
 415        for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 416                gpu_write(gpu, a5xx_hwcg[i].offset,
 417                        state ? a5xx_hwcg[i].value : 0);
 418
 419        gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 420        gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 421}
 422
 423static int a5xx_me_init(struct msm_gpu *gpu)
 424{
 425        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 426        struct msm_ringbuffer *ring = gpu->rb[0];
 427
 428        OUT_PKT7(ring, CP_ME_INIT, 8);
 429
 430        OUT_RING(ring, 0x0000002F);
 431
 432        /* Enable multiple hardware contexts */
 433        OUT_RING(ring, 0x00000003);
 434
 435        /* Enable error detection */
 436        OUT_RING(ring, 0x20000000);
 437
 438        /* Don't enable header dump */
 439        OUT_RING(ring, 0x00000000);
 440        OUT_RING(ring, 0x00000000);
 441
 442        /* Specify workarounds for various microcode issues */
 443        if (adreno_is_a530(adreno_gpu)) {
 444                /* Workaround for token end syncs
 445                 * Force a WFI after every direct-render 3D mode draw and every
 446                 * 2D mode 3 draw
 447                 */
 448                OUT_RING(ring, 0x0000000B);
 449        } else {
 450                /* No workarounds enabled */
 451                OUT_RING(ring, 0x00000000);
 452        }
 453
 454        OUT_RING(ring, 0x00000000);
 455        OUT_RING(ring, 0x00000000);
 456
 457        gpu->funcs->flush(gpu, ring);
 458        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 459}
 460
 461static int a5xx_preempt_start(struct msm_gpu *gpu)
 462{
 463        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 464        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 465        struct msm_ringbuffer *ring = gpu->rb[0];
 466
 467        if (gpu->nr_rings == 1)
 468                return 0;
 469
 470        /* Turn off protected mode to write to special registers */
 471        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 472        OUT_RING(ring, 0);
 473
 474        /* Set the save preemption record for the ring/command */
 475        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 476        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 477        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 478
 479        /* Turn back on protected mode */
 480        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 481        OUT_RING(ring, 1);
 482
 483        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 484        OUT_RING(ring, 0x00);
 485
 486        OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 487        OUT_RING(ring, 0x01);
 488
 489        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 490        OUT_RING(ring, 0x01);
 491
 492        /* Yield the floor on command completion */
 493        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 494        OUT_RING(ring, 0x00);
 495        OUT_RING(ring, 0x00);
 496        OUT_RING(ring, 0x01);
 497        OUT_RING(ring, 0x01);
 498
 499        gpu->funcs->flush(gpu, ring);
 500
 501        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 502}
 503
 504static int a5xx_ucode_init(struct msm_gpu *gpu)
 505{
 506        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 507        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 508        int ret;
 509
 510        if (!a5xx_gpu->pm4_bo) {
 511                a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 512                        adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 513
 514                if (IS_ERR(a5xx_gpu->pm4_bo)) {
 515                        ret = PTR_ERR(a5xx_gpu->pm4_bo);
 516                        a5xx_gpu->pm4_bo = NULL;
 517                        dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 518                                ret);
 519                        return ret;
 520                }
 521        }
 522
 523        if (!a5xx_gpu->pfp_bo) {
 524                a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 525                        adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 526
 527                if (IS_ERR(a5xx_gpu->pfp_bo)) {
 528                        ret = PTR_ERR(a5xx_gpu->pfp_bo);
 529                        a5xx_gpu->pfp_bo = NULL;
 530                        dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 531                                ret);
 532                        return ret;
 533                }
 534        }
 535
 536        gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 537                REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 538
 539        gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 540                REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 541
 542        return 0;
 543}
 544
 545#define SCM_GPU_ZAP_SHADER_RESUME 0
 546
 547static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 548{
 549        int ret;
 550
 551        ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 552        if (ret)
 553                DRM_ERROR("%s: zap-shader resume failed: %d\n",
 554                        gpu->name, ret);
 555
 556        return ret;
 557}
 558
 559static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 560{
 561        static bool loaded;
 562        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 563        struct platform_device *pdev = gpu->pdev;
 564        int ret;
 565
 566        /*
 567         * If the zap shader is already loaded into memory we just need to kick
 568         * the remote processor to reinitialize it
 569         */
 570        if (loaded)
 571                return a5xx_zap_shader_resume(gpu);
 572
 573        /* We need SCM to be able to load the firmware */
 574        if (!qcom_scm_is_available()) {
 575                DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 576                return -EPROBE_DEFER;
 577        }
 578
 579        /* Each GPU has a target specific zap shader firmware name to use */
 580        if (!adreno_gpu->info->zapfw) {
 581                DRM_DEV_ERROR(&pdev->dev,
 582                        "Zap shader firmware file not specified for this target\n");
 583                return -ENODEV;
 584        }
 585
 586        ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 587
 588        loaded = !ret;
 589
 590        return ret;
 591}
 592
 593#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 594          A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 595          A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 596          A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 597          A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 598          A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 599          A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 600          A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 601          A5XX_RBBM_INT_0_MASK_CP_SW | \
 602          A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 603          A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 604          A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 605
 606static int a5xx_hw_init(struct msm_gpu *gpu)
 607{
 608        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 609        int ret;
 610
 611        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 612
 613        /* Make all blocks contribute to the GPU BUSY perf counter */
 614        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 615
 616        /* Enable RBBM error reporting bits */
 617        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 618
 619        if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 620                /*
 621                 * Mask out the activity signals from RB1-3 to avoid false
 622                 * positives
 623                 */
 624
 625                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 626                        0xF0000000);
 627                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 628                        0xFFFFFFFF);
 629                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 630                        0xFFFFFFFF);
 631                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 632                        0xFFFFFFFF);
 633                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 634                        0xFFFFFFFF);
 635                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 636                        0xFFFFFFFF);
 637                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 638                        0xFFFFFFFF);
 639                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 640                        0xFFFFFFFF);
 641        }
 642
 643        /* Enable fault detection */
 644        gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 645                (1 << 30) | 0xFFFF);
 646
 647        /* Turn on performance counters */
 648        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 649
 650        /* Select CP0 to always count cycles */
 651        gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 652
 653        /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 654        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 655
 656        /* Increase VFD cache access so LRZ and other data gets evicted less */
 657        gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 658
 659        /* Disable L2 bypass in the UCHE */
 660        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 661        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 662        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 663        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 664
 665        /* Set the GMEM VA range (0 to gpu->gmem) */
 666        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 667        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 668        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 669                0x00100000 + adreno_gpu->gmem - 1);
 670        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 671
 672        gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 673        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 674        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 675        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 676
 677        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 678
 679        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 680                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 681
 682        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 683
 684        /* Enable USE_RETENTION_FLOPS */
 685        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 686
 687        /* Enable ME/PFP split notification */
 688        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 689
 690        /* Enable HWCG */
 691        a5xx_set_hwcg(gpu, true);
 692
 693        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 694
 695        /* Set the highest bank bit */
 696        gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 697        gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 698
 699        /* Protect registers from the CP */
 700        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 701
 702        /* RBBM */
 703        gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 704        gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 705        gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 706        gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 707        gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 708        gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 709
 710        /* Content protect */
 711        gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 712                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 713                        16));
 714        gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 715                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 716
 717        /* CP */
 718        gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 719        gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 720        gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 721        gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 722
 723        /* RB */
 724        gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 725        gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 726
 727        /* VPC */
 728        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 729        gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 730
 731        /* UCHE */
 732        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 733
 734        if (adreno_is_a530(adreno_gpu))
 735                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 736                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 737
 738        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 739        /*
 740         * Disable the trusted memory range - we don't actually supported secure
 741         * memory rendering at this point in time and we don't want to block off
 742         * part of the virtual memory space.
 743         */
 744        gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 745                REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 746        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 747
 748        ret = adreno_hw_init(gpu);
 749        if (ret)
 750                return ret;
 751
 752        a5xx_preempt_hw_init(gpu);
 753
 754        a5xx_gpmu_ucode_init(gpu);
 755
 756        ret = a5xx_ucode_init(gpu);
 757        if (ret)
 758                return ret;
 759
 760        /* Disable the interrupts through the initial bringup stage */
 761        gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 762
 763        /* Clear ME_HALT to start the micro engine */
 764        gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 765        ret = a5xx_me_init(gpu);
 766        if (ret)
 767                return ret;
 768
 769        ret = a5xx_power_init(gpu);
 770        if (ret)
 771                return ret;
 772
 773        /*
 774         * Send a pipeline event stat to get misbehaving counters to start
 775         * ticking correctly
 776         */
 777        if (adreno_is_a530(adreno_gpu)) {
 778                OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 779                OUT_RING(gpu->rb[0], 0x0F);
 780
 781                gpu->funcs->flush(gpu, gpu->rb[0]);
 782                if (!a5xx_idle(gpu, gpu->rb[0]))
 783                        return -EINVAL;
 784        }
 785
 786        /*
 787         * Try to load a zap shader into the secure world. If successful
 788         * we can use the CP to switch out of secure mode. If not then we
 789         * have no resource but to try to switch ourselves out manually. If we
 790         * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 791         * be blocked and a permissions violation will soon follow.
 792         */
 793        ret = a5xx_zap_shader_init(gpu);
 794        if (!ret) {
 795                OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 796                OUT_RING(gpu->rb[0], 0x00000000);
 797
 798                gpu->funcs->flush(gpu, gpu->rb[0]);
 799                if (!a5xx_idle(gpu, gpu->rb[0]))
 800                        return -EINVAL;
 801        } else {
 802                /* Print a warning so if we die, we know why */
 803                dev_warn_once(gpu->dev->dev,
 804                        "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 805                gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 806        }
 807
 808        /* Last step - yield the ringbuffer */
 809        a5xx_preempt_start(gpu);
 810
 811        return 0;
 812}
 813
 814static void a5xx_recover(struct msm_gpu *gpu)
 815{
 816        int i;
 817
 818        adreno_dump_info(gpu);
 819
 820        for (i = 0; i < 8; i++) {
 821                printk("CP_SCRATCH_REG%d: %u\n", i,
 822                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 823        }
 824
 825        if (hang_debug)
 826                a5xx_dump(gpu);
 827
 828        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 829        gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 830        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 831        adreno_recover(gpu);
 832}
 833
 834static void a5xx_destroy(struct msm_gpu *gpu)
 835{
 836        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 837        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 838
 839        DBG("%s", gpu->name);
 840
 841        a5xx_preempt_fini(gpu);
 842
 843        if (a5xx_gpu->pm4_bo) {
 844                if (a5xx_gpu->pm4_iova)
 845                        msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 846                drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 847        }
 848
 849        if (a5xx_gpu->pfp_bo) {
 850                if (a5xx_gpu->pfp_iova)
 851                        msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 852                drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 853        }
 854
 855        if (a5xx_gpu->gpmu_bo) {
 856                if (a5xx_gpu->gpmu_iova)
 857                        msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 858                drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 859        }
 860
 861        adreno_gpu_cleanup(adreno_gpu);
 862        kfree(a5xx_gpu);
 863}
 864
 865static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 866{
 867        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 868                return false;
 869
 870        /*
 871         * Nearly every abnormality ends up pausing the GPU and triggering a
 872         * fault so we can safely just watch for this one interrupt to fire
 873         */
 874        return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 875                A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 876}
 877
 878bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 879{
 880        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 881        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 882
 883        if (ring != a5xx_gpu->cur_ring) {
 884                WARN(1, "Tried to idle a non-current ringbuffer\n");
 885                return false;
 886        }
 887
 888        /* wait for CP to drain ringbuffer: */
 889        if (!adreno_idle(gpu, ring))
 890                return false;
 891
 892        if (spin_until(_a5xx_check_idle(gpu))) {
 893                DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 894                        gpu->name, __builtin_return_address(0),
 895                        gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 896                        gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 897                        gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 898                        gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 899                return false;
 900        }
 901
 902        return true;
 903}
 904
 905static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 906{
 907        struct msm_gpu *gpu = arg;
 908        pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 909                        iova, flags,
 910                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 911                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 912                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 913                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 914
 915        return -EFAULT;
 916}
 917
 918static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 919{
 920        u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 921
 922        if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 923                u32 val;
 924
 925                gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 926
 927                /*
 928                 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 929                 * read it twice
 930                 */
 931
 932                gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 933                val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 934
 935                dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 936                        val);
 937        }
 938
 939        if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 940                dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 941                        gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 942
 943        if (status & A5XX_CP_INT_CP_DMA_ERROR)
 944                dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 945
 946        if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 947                u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 948
 949                dev_err_ratelimited(gpu->dev->dev,
 950                        "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 951                        val & (1 << 24) ? "WRITE" : "READ",
 952                        (val & 0xFFFFF) >> 2, val);
 953        }
 954
 955        if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 956                u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 957                const char *access[16] = { "reserved", "reserved",
 958                        "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 959                        "", "", "me read", "me write", "", "", "crashdump read",
 960                        "crashdump write" };
 961
 962                dev_err_ratelimited(gpu->dev->dev,
 963                        "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 964                        status & 0xFFFFF, access[(status >> 24) & 0xF],
 965                        (status & (1 << 31)), status);
 966        }
 967}
 968
 969static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 970{
 971        if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 972                u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 973
 974                dev_err_ratelimited(gpu->dev->dev,
 975                        "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 976                        val & (1 << 28) ? "WRITE" : "READ",
 977                        (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 978                        (val >> 24) & 0xF);
 979
 980                /* Clear the error */
 981                gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 982
 983                /* Clear the interrupt */
 984                gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 985                        A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 986        }
 987
 988        if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 989                dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 990
 991        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 992                dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 993                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 994
 995        if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 996                dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 997                        gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 998
 999        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1000                dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1001                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1002
1003        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1005
1006        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1007                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1008}
1009
1010static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1011{
1012        uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1013
1014        addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1015
1016        dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1017                addr);
1018}
1019
1020static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1021{
1022        dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1023}
1024
1025static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1026{
1027        struct drm_device *dev = gpu->dev;
1028        struct msm_drm_private *priv = dev->dev_private;
1029        struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1030
1031        dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1032                ring ? ring->id : -1, ring ? ring->seqno : 0,
1033                gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1034                gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1035                gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1036                gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1037                gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1038                gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1039                gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1040
1041        /* Turn off the hangcheck timer to keep it from bothering us */
1042        del_timer(&gpu->hangcheck_timer);
1043
1044        queue_work(priv->wq, &gpu->recover_work);
1045}
1046
1047#define RBBM_ERROR_MASK \
1048        (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1049        A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1050        A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1051        A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1052        A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1053        A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1054
1055static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1056{
1057        u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1058
1059        /*
1060         * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1061         * before the source is cleared the interrupt will storm.
1062         */
1063        gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1064                status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1065
1066        /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1067        if (status & RBBM_ERROR_MASK)
1068                a5xx_rbbm_err_irq(gpu, status);
1069
1070        if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1071                a5xx_cp_err_irq(gpu);
1072
1073        if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1074                a5xx_fault_detect_irq(gpu);
1075
1076        if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1077                a5xx_uche_err_irq(gpu);
1078
1079        if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1080                a5xx_gpmu_err_irq(gpu);
1081
1082        if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1083                a5xx_preempt_trigger(gpu);
1084                msm_gpu_retire(gpu);
1085        }
1086
1087        if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1088                a5xx_preempt_irq(gpu);
1089
1090        return IRQ_HANDLED;
1091}
1092
1093static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098                REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1102};
1103
1104static const u32 a5xx_registers[] = {
1105        0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1106        0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1107        0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1108        0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1109        0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1110        0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1111        0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1112        0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1113        0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1114        0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1115        0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1116        0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1117        0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1118        0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1119        0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1120        0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1121        0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1122        0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1123        0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1124        0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1125        0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1126        0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1127        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1128        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1129        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1130        0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1131        0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1132        0xAC60, 0xAC60, ~0,
1133};
1134
1135static void a5xx_dump(struct msm_gpu *gpu)
1136{
1137        dev_info(gpu->dev->dev, "status:   %08x\n",
1138                gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1139        adreno_dump(gpu);
1140}
1141
1142static int a5xx_pm_resume(struct msm_gpu *gpu)
1143{
1144        int ret;
1145
1146        /* Turn on the core power */
1147        ret = msm_gpu_pm_resume(gpu);
1148        if (ret)
1149                return ret;
1150
1151        /* Turn the RBCCU domain first to limit the chances of voltage droop */
1152        gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1153
1154        /* Wait 3 usecs before polling */
1155        udelay(3);
1156
1157        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1158                (1 << 20), (1 << 20));
1159        if (ret) {
1160                DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1161                        gpu->name,
1162                        gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1163                return ret;
1164        }
1165
1166        /* Turn on the SP domain */
1167        gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1168        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1169                (1 << 20), (1 << 20));
1170        if (ret)
1171                DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1172                        gpu->name);
1173
1174        return ret;
1175}
1176
1177static int a5xx_pm_suspend(struct msm_gpu *gpu)
1178{
1179        /* Clear the VBIF pipe before shutting down */
1180        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181        spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1182
1183        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1184
1185        /*
1186         * Reset the VBIF before power collapse to avoid issue with FIFO
1187         * entries
1188         */
1189        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1190        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1191
1192        return msm_gpu_pm_suspend(gpu);
1193}
1194
1195static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1196{
1197        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1198                REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1199
1200        return 0;
1201}
1202
1203struct a5xx_crashdumper {
1204        void *ptr;
1205        struct drm_gem_object *bo;
1206        u64 iova;
1207};
1208
1209struct a5xx_gpu_state {
1210        struct msm_gpu_state base;
1211        u32 *hlsqregs;
1212};
1213
1214#define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215        readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1216                interval, timeout)
1217
1218static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1219                struct a5xx_crashdumper *dumper)
1220{
1221        dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1222                SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1223                &dumper->bo, &dumper->iova);
1224
1225        if (IS_ERR(dumper->ptr))
1226                return PTR_ERR(dumper->ptr);
1227
1228        return 0;
1229}
1230
1231static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1232                struct a5xx_crashdumper *dumper)
1233{
1234        msm_gem_put_iova(dumper->bo, gpu->aspace);
1235        msm_gem_put_vaddr(dumper->bo);
1236
1237        drm_gem_object_unreference(dumper->bo);
1238}
1239
1240static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1241                struct a5xx_crashdumper *dumper)
1242{
1243        u32 val;
1244
1245        if (IS_ERR_OR_NULL(dumper->ptr))
1246                return -EINVAL;
1247
1248        gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1249                REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1250
1251        gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1252
1253        return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1254                val & 0x04, 100, 10000);
1255}
1256
1257/*
1258 * These are a list of the registers that need to be read through the HLSQ
1259 * aperture through the crashdumper.  These are not nominally accessible from
1260 * the CPU on a secure platform.
1261 */
1262static const struct {
1263        u32 type;
1264        u32 regoffset;
1265        u32 count;
1266} a5xx_hlsq_aperture_regs[] = {
1267        { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1268        { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1269        { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1270        { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1271        { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1272        { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1273        { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1274        { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1275        { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1276        { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1277        { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1278        { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1279        { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1280        { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1281        { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1282};
1283
1284static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1285                struct a5xx_gpu_state *a5xx_state)
1286{
1287        struct a5xx_crashdumper dumper = { 0 };
1288        u32 offset, count = 0;
1289        u64 *ptr;
1290        int i;
1291
1292        if (a5xx_crashdumper_init(gpu, &dumper))
1293                return;
1294
1295        /* The script will be written at offset 0 */
1296        ptr = dumper.ptr;
1297
1298        /* Start writing the data at offset 256k */
1299        offset = dumper.iova + (256 * SZ_1K);
1300
1301        /* Count how many additional registers to get from the HLSQ aperture */
1302        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1303                count += a5xx_hlsq_aperture_regs[i].count;
1304
1305        a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1306        if (!a5xx_state->hlsqregs)
1307                return;
1308
1309        /* Build the crashdump script */
1310        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1311                u32 type = a5xx_hlsq_aperture_regs[i].type;
1312                u32 c = a5xx_hlsq_aperture_regs[i].count;
1313
1314                /* Write the register to select the desired bank */
1315                *ptr++ = ((u64) type << 8);
1316                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1317                        (1 << 21) | 1;
1318
1319                *ptr++ = offset;
1320                *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1321                        | c;
1322
1323                offset += c * sizeof(u32);
1324        }
1325
1326        /* Write two zeros to close off the script */
1327        *ptr++ = 0;
1328        *ptr++ = 0;
1329
1330        if (a5xx_crashdumper_run(gpu, &dumper)) {
1331                kfree(a5xx_state->hlsqregs);
1332                a5xx_crashdumper_free(gpu, &dumper);
1333                return;
1334        }
1335
1336        /* Copy the data from the crashdumper to the state */
1337        memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1338                count * sizeof(u32));
1339
1340        a5xx_crashdumper_free(gpu, &dumper);
1341}
1342
1343static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1344{
1345        struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1346                        GFP_KERNEL);
1347
1348        if (!a5xx_state)
1349                return ERR_PTR(-ENOMEM);
1350
1351        /* Temporarily disable hardware clock gating before reading the hw */
1352        a5xx_set_hwcg(gpu, false);
1353
1354        /* First get the generic state from the adreno core */
1355        adreno_gpu_state_get(gpu, &(a5xx_state->base));
1356
1357        a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1358
1359        /* Get the HLSQ regs with the help of the crashdumper */
1360        a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1361
1362        a5xx_set_hwcg(gpu, true);
1363
1364        return &a5xx_state->base;
1365}
1366
1367static void a5xx_gpu_state_destroy(struct kref *kref)
1368{
1369        struct msm_gpu_state *state = container_of(kref,
1370                struct msm_gpu_state, ref);
1371        struct a5xx_gpu_state *a5xx_state = container_of(state,
1372                struct a5xx_gpu_state, base);
1373
1374        kfree(a5xx_state->hlsqregs);
1375
1376        adreno_gpu_state_destroy(state);
1377        kfree(a5xx_state);
1378}
1379
1380int a5xx_gpu_state_put(struct msm_gpu_state *state)
1381{
1382        if (IS_ERR_OR_NULL(state))
1383                return 1;
1384
1385        return kref_put(&state->ref, a5xx_gpu_state_destroy);
1386}
1387
1388
1389#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1390void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1391                struct drm_printer *p)
1392{
1393        int i, j;
1394        u32 pos = 0;
1395        struct a5xx_gpu_state *a5xx_state = container_of(state,
1396                struct a5xx_gpu_state, base);
1397
1398        if (IS_ERR_OR_NULL(state))
1399                return;
1400
1401        adreno_show(gpu, state, p);
1402
1403        /* Dump the additional a5xx HLSQ registers */
1404        if (!a5xx_state->hlsqregs)
1405                return;
1406
1407        drm_printf(p, "registers-hlsq:\n");
1408
1409        for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1410                u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1411                u32 c = a5xx_hlsq_aperture_regs[i].count;
1412
1413                for (j = 0; j < c; j++, pos++, o++) {
1414                        /*
1415                         * To keep the crashdump simple we pull the entire range
1416                         * for each register type but not all of the registers
1417                         * in the range are valid. Fortunately invalid registers
1418                         * stick out like a sore thumb with a value of
1419                         * 0xdeadbeef
1420                         */
1421                        if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1422                                continue;
1423
1424                        drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1425                                o << 2, a5xx_state->hlsqregs[pos]);
1426                }
1427        }
1428}
1429#endif
1430
1431static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1432{
1433        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1434        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1435
1436        return a5xx_gpu->cur_ring;
1437}
1438
1439static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1440{
1441        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1442                REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1443
1444        return 0;
1445}
1446
1447static const struct adreno_gpu_funcs funcs = {
1448        .base = {
1449                .get_param = adreno_get_param,
1450                .hw_init = a5xx_hw_init,
1451                .pm_suspend = a5xx_pm_suspend,
1452                .pm_resume = a5xx_pm_resume,
1453                .recover = a5xx_recover,
1454                .submit = a5xx_submit,
1455                .flush = a5xx_flush,
1456                .active_ring = a5xx_active_ring,
1457                .irq = a5xx_irq,
1458                .destroy = a5xx_destroy,
1459#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1460                .show = a5xx_show,
1461#endif
1462#if defined(CONFIG_DEBUG_FS)
1463                .debugfs_init = a5xx_debugfs_init,
1464#endif
1465                .gpu_busy = a5xx_gpu_busy,
1466                .gpu_state_get = a5xx_gpu_state_get,
1467                .gpu_state_put = a5xx_gpu_state_put,
1468        },
1469        .get_timestamp = a5xx_get_timestamp,
1470};
1471
1472static void check_speed_bin(struct device *dev)
1473{
1474        struct nvmem_cell *cell;
1475        u32 bin, val;
1476
1477        cell = nvmem_cell_get(dev, "speed_bin");
1478
1479        /* If a nvmem cell isn't defined, nothing to do */
1480        if (IS_ERR(cell))
1481                return;
1482
1483        bin = *((u32 *) nvmem_cell_read(cell, NULL));
1484        nvmem_cell_put(cell);
1485
1486        val = (1 << bin);
1487
1488        dev_pm_opp_set_supported_hw(dev, &val, 1);
1489}
1490
1491struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1492{
1493        struct msm_drm_private *priv = dev->dev_private;
1494        struct platform_device *pdev = priv->gpu_pdev;
1495        struct a5xx_gpu *a5xx_gpu = NULL;
1496        struct adreno_gpu *adreno_gpu;
1497        struct msm_gpu *gpu;
1498        int ret;
1499
1500        if (!pdev) {
1501                dev_err(dev->dev, "No A5XX device is defined\n");
1502                return ERR_PTR(-ENXIO);
1503        }
1504
1505        a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1506        if (!a5xx_gpu)
1507                return ERR_PTR(-ENOMEM);
1508
1509        adreno_gpu = &a5xx_gpu->base;
1510        gpu = &adreno_gpu->base;
1511
1512        adreno_gpu->registers = a5xx_registers;
1513        adreno_gpu->reg_offsets = a5xx_register_offsets;
1514
1515        a5xx_gpu->lm_leakage = 0x4E001A;
1516
1517        check_speed_bin(&pdev->dev);
1518
1519        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1520        if (ret) {
1521                a5xx_destroy(&(a5xx_gpu->base.base));
1522                return ERR_PTR(ret);
1523        }
1524
1525        if (gpu->aspace)
1526                msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1527
1528        /* Set up the preemption specific bits and pieces for each ringbuffer */
1529        a5xx_preempt_init(gpu);
1530
1531        return gpu;
1532}
1533