linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
<<
>>
Prefs
   1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 and
   5 * only version 2 as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/cpumask.h>
  16#include <linux/qcom_scm.h>
  17#include <linux/dma-mapping.h>
  18#include <linux/of_address.h>
  19#include <linux/soc/qcom/mdt_loader.h>
  20#include <linux/pm_opp.h>
  21#include <linux/nvmem-consumer.h>
  22#include "msm_gem.h"
  23#include "msm_mmu.h"
  24#include "a5xx_gpu.h"
  25
  26extern bool hang_debug;
  27static void a5xx_dump(struct msm_gpu *gpu);
  28
  29#define GPU_PAS_ID 13
  30
  31static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  32{
  33        struct device *dev = &gpu->pdev->dev;
  34        const struct firmware *fw;
  35        struct device_node *np;
  36        struct resource r;
  37        phys_addr_t mem_phys;
  38        ssize_t mem_size;
  39        void *mem_region = NULL;
  40        int ret;
  41
  42        if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  43                return -EINVAL;
  44
  45        np = of_get_child_by_name(dev->of_node, "zap-shader");
  46        if (!np)
  47                return -ENODEV;
  48
  49        np = of_parse_phandle(np, "memory-region", 0);
  50        if (!np)
  51                return -EINVAL;
  52
  53        ret = of_address_to_resource(np, 0, &r);
  54        if (ret)
  55                return ret;
  56
  57        mem_phys = r.start;
  58        mem_size = resource_size(&r);
  59
  60        /* Request the MDT file for the firmware */
  61        fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  62        if (IS_ERR(fw)) {
  63                DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  64                return PTR_ERR(fw);
  65        }
  66
  67        /* Figure out how much memory we need */
  68        mem_size = qcom_mdt_get_size(fw);
  69        if (mem_size < 0) {
  70                ret = mem_size;
  71                goto out;
  72        }
  73
  74        /* Allocate memory for the firmware image */
  75        mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  76        if (!mem_region) {
  77                ret = -ENOMEM;
  78                goto out;
  79        }
  80
  81        /*
  82         * Load the rest of the MDT
  83         *
  84         * Note that we could be dealing with two different paths, since
  85         * with upstream linux-firmware it would be in a qcom/ subdir..
  86         * adreno_request_fw() handles this, but qcom_mdt_load() does
  87         * not.  But since we've already gotten thru adreno_request_fw()
  88         * we know which of the two cases it is:
  89         */
  90        if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  91                ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  92                                mem_region, mem_phys, mem_size);
  93        } else {
  94                char newname[strlen("qcom/") + strlen(fwname) + 1];
  95
  96                sprintf(newname, "qcom/%s", fwname);
  97
  98                ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
  99                                mem_region, mem_phys, mem_size);
 100        }
 101        if (ret)
 102                goto out;
 103
 104        /* Send the image to the secure world */
 105        ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 106        if (ret)
 107                DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 108
 109out:
 110        if (mem_region)
 111                memunmap(mem_region);
 112
 113        release_firmware(fw);
 114
 115        return ret;
 116}
 117
 118static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 119{
 120        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 121        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 122        uint32_t wptr;
 123        unsigned long flags;
 124
 125        spin_lock_irqsave(&ring->lock, flags);
 126
 127        /* Copy the shadow to the actual register */
 128        ring->cur = ring->next;
 129
 130        /* Make sure to wrap wptr if we need to */
 131        wptr = get_wptr(ring);
 132
 133        spin_unlock_irqrestore(&ring->lock, flags);
 134
 135        /* Make sure everything is posted before making a decision */
 136        mb();
 137
 138        /* Update HW if this is the current ring and we are not in preempt */
 139        if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 140                gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 141}
 142
 143static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 144        struct msm_file_private *ctx)
 145{
 146        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 147        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 148        struct msm_drm_private *priv = gpu->dev->dev_private;
 149        struct msm_ringbuffer *ring = submit->ring;
 150        unsigned int i, ibs = 0;
 151
 152        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 153        OUT_RING(ring, 0x02);
 154
 155        /* Turn off protected mode to write to special registers */
 156        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 157        OUT_RING(ring, 0);
 158
 159        /* Set the save preemption record for the ring/command */
 160        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 161        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 162        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 163
 164        /* Turn back on protected mode */
 165        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 166        OUT_RING(ring, 1);
 167
 168        /* Enable local preemption for finegrain preemption */
 169        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 170        OUT_RING(ring, 0x02);
 171
 172        /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 173        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 174        OUT_RING(ring, 0x02);
 175
 176        /* Submit the commands */
 177        for (i = 0; i < submit->nr_cmds; i++) {
 178                switch (submit->cmd[i].type) {
 179                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 180                        break;
 181                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 182                        if (priv->lastctx == ctx)
 183                                break;
 184                case MSM_SUBMIT_CMD_BUF:
 185                        OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 186                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 187                        OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 188                        OUT_RING(ring, submit->cmd[i].size);
 189                        ibs++;
 190                        break;
 191                }
 192        }
 193
 194        /*
 195         * Write the render mode to NULL (0) to indicate to the CP that the IBs
 196         * are done rendering - otherwise a lucky preemption would start
 197         * replaying from the last checkpoint
 198         */
 199        OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 200        OUT_RING(ring, 0);
 201        OUT_RING(ring, 0);
 202        OUT_RING(ring, 0);
 203        OUT_RING(ring, 0);
 204        OUT_RING(ring, 0);
 205
 206        /* Turn off IB level preemptions */
 207        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 208        OUT_RING(ring, 0x01);
 209
 210        /* Write the fence to the scratch register */
 211        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 212        OUT_RING(ring, submit->seqno);
 213
 214        /*
 215         * Execute a CACHE_FLUSH_TS event. This will ensure that the
 216         * timestamp is written to the memory and then triggers the interrupt
 217         */
 218        OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 219        OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 220        OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 221        OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 222        OUT_RING(ring, submit->seqno);
 223
 224        /* Yield the floor on command completion */
 225        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 226        /*
 227         * If dword[2:1] are non zero, they specify an address for the CP to
 228         * write the value of dword[3] to on preemption complete. Write 0 to
 229         * skip the write
 230         */
 231        OUT_RING(ring, 0x00);
 232        OUT_RING(ring, 0x00);
 233        /* Data value - not used if the address above is 0 */
 234        OUT_RING(ring, 0x01);
 235        /* Set bit 0 to trigger an interrupt on preempt complete */
 236        OUT_RING(ring, 0x01);
 237
 238        a5xx_flush(gpu, ring);
 239
 240        /* Check to see if we need to start preemption */
 241        a5xx_preempt_trigger(gpu);
 242}
 243
 244static const struct {
 245        u32 offset;
 246        u32 value;
 247} a5xx_hwcg[] = {
 248        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 249        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 250        {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 251        {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 252        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 253        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 254        {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 255        {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 256        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 257        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 258        {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 259        {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 260        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 261        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 262        {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 263        {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 264        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 265        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 266        {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 267        {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 268        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 269        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 270        {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 271        {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 272        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 273        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 274        {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 275        {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 276        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 277        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 278        {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 279        {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 280        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 281        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 282        {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 283        {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 284        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 285        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 286        {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 287        {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 288        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 289        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 290        {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 291        {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 292        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 293        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 294        {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 295        {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 296        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 297        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 298        {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 299        {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 300        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 301        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 302        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 303        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 304        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 305        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 306        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 307        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 308        {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 309        {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 310        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 311        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 312        {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 313        {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 314        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 315        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 316        {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 317        {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 318        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 319        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 320        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 321        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 322        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 323        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 324        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 325        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 326        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 327        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 328        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 329        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 330        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 331        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 332        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 333        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 334        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 335        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 336        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 337        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 338        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 339        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 340};
 341
 342void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 343{
 344        unsigned int i;
 345
 346        for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 347                gpu_write(gpu, a5xx_hwcg[i].offset,
 348                        state ? a5xx_hwcg[i].value : 0);
 349
 350        gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 351        gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 352}
 353
 354static int a5xx_me_init(struct msm_gpu *gpu)
 355{
 356        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 357        struct msm_ringbuffer *ring = gpu->rb[0];
 358
 359        OUT_PKT7(ring, CP_ME_INIT, 8);
 360
 361        OUT_RING(ring, 0x0000002F);
 362
 363        /* Enable multiple hardware contexts */
 364        OUT_RING(ring, 0x00000003);
 365
 366        /* Enable error detection */
 367        OUT_RING(ring, 0x20000000);
 368
 369        /* Don't enable header dump */
 370        OUT_RING(ring, 0x00000000);
 371        OUT_RING(ring, 0x00000000);
 372
 373        /* Specify workarounds for various microcode issues */
 374        if (adreno_is_a530(adreno_gpu)) {
 375                /* Workaround for token end syncs
 376                 * Force a WFI after every direct-render 3D mode draw and every
 377                 * 2D mode 3 draw
 378                 */
 379                OUT_RING(ring, 0x0000000B);
 380        } else {
 381                /* No workarounds enabled */
 382                OUT_RING(ring, 0x00000000);
 383        }
 384
 385        OUT_RING(ring, 0x00000000);
 386        OUT_RING(ring, 0x00000000);
 387
 388        gpu->funcs->flush(gpu, ring);
 389        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 390}
 391
 392static int a5xx_preempt_start(struct msm_gpu *gpu)
 393{
 394        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 395        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 396        struct msm_ringbuffer *ring = gpu->rb[0];
 397
 398        if (gpu->nr_rings == 1)
 399                return 0;
 400
 401        /* Turn off protected mode to write to special registers */
 402        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 403        OUT_RING(ring, 0);
 404
 405        /* Set the save preemption record for the ring/command */
 406        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 407        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 408        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 409
 410        /* Turn back on protected mode */
 411        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 412        OUT_RING(ring, 1);
 413
 414        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 415        OUT_RING(ring, 0x00);
 416
 417        OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 418        OUT_RING(ring, 0x01);
 419
 420        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 421        OUT_RING(ring, 0x01);
 422
 423        /* Yield the floor on command completion */
 424        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 425        OUT_RING(ring, 0x00);
 426        OUT_RING(ring, 0x00);
 427        OUT_RING(ring, 0x01);
 428        OUT_RING(ring, 0x01);
 429
 430        gpu->funcs->flush(gpu, ring);
 431
 432        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 433}
 434
 435
 436static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
 437                const struct firmware *fw, u64 *iova)
 438{
 439        struct drm_gem_object *bo;
 440        void *ptr;
 441
 442        ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
 443                MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
 444
 445        if (IS_ERR(ptr))
 446                return ERR_CAST(ptr);
 447
 448        memcpy(ptr, &fw->data[4], fw->size - 4);
 449
 450        msm_gem_put_vaddr(bo);
 451        return bo;
 452}
 453
 454static int a5xx_ucode_init(struct msm_gpu *gpu)
 455{
 456        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 457        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 458        int ret;
 459
 460        if (!a5xx_gpu->pm4_bo) {
 461                a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4,
 462                        &a5xx_gpu->pm4_iova);
 463
 464                if (IS_ERR(a5xx_gpu->pm4_bo)) {
 465                        ret = PTR_ERR(a5xx_gpu->pm4_bo);
 466                        a5xx_gpu->pm4_bo = NULL;
 467                        dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 468                                ret);
 469                        return ret;
 470                }
 471        }
 472
 473        if (!a5xx_gpu->pfp_bo) {
 474                a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp,
 475                        &a5xx_gpu->pfp_iova);
 476
 477                if (IS_ERR(a5xx_gpu->pfp_bo)) {
 478                        ret = PTR_ERR(a5xx_gpu->pfp_bo);
 479                        a5xx_gpu->pfp_bo = NULL;
 480                        dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 481                                ret);
 482                        return ret;
 483                }
 484        }
 485
 486        gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 487                REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 488
 489        gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 490                REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 491
 492        return 0;
 493}
 494
 495#define SCM_GPU_ZAP_SHADER_RESUME 0
 496
 497static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 498{
 499        int ret;
 500
 501        ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 502        if (ret)
 503                DRM_ERROR("%s: zap-shader resume failed: %d\n",
 504                        gpu->name, ret);
 505
 506        return ret;
 507}
 508
 509static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 510{
 511        static bool loaded;
 512        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 513        struct platform_device *pdev = gpu->pdev;
 514        int ret;
 515
 516        /*
 517         * If the zap shader is already loaded into memory we just need to kick
 518         * the remote processor to reinitialize it
 519         */
 520        if (loaded)
 521                return a5xx_zap_shader_resume(gpu);
 522
 523        /* We need SCM to be able to load the firmware */
 524        if (!qcom_scm_is_available()) {
 525                DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 526                return -EPROBE_DEFER;
 527        }
 528
 529        /* Each GPU has a target specific zap shader firmware name to use */
 530        if (!adreno_gpu->info->zapfw) {
 531                DRM_DEV_ERROR(&pdev->dev,
 532                        "Zap shader firmware file not specified for this target\n");
 533                return -ENODEV;
 534        }
 535
 536        ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 537
 538        loaded = !ret;
 539
 540        return ret;
 541}
 542
 543#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 544          A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 545          A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 546          A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 547          A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 548          A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 549          A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 550          A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 551          A5XX_RBBM_INT_0_MASK_CP_SW | \
 552          A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 553          A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 554          A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 555
 556static int a5xx_hw_init(struct msm_gpu *gpu)
 557{
 558        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 559        int ret;
 560
 561        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 562
 563        /* Make all blocks contribute to the GPU BUSY perf counter */
 564        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 565
 566        /* Enable RBBM error reporting bits */
 567        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 568
 569        if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 570                /*
 571                 * Mask out the activity signals from RB1-3 to avoid false
 572                 * positives
 573                 */
 574
 575                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 576                        0xF0000000);
 577                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 578                        0xFFFFFFFF);
 579                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 580                        0xFFFFFFFF);
 581                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 582                        0xFFFFFFFF);
 583                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 584                        0xFFFFFFFF);
 585                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 586                        0xFFFFFFFF);
 587                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 588                        0xFFFFFFFF);
 589                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 590                        0xFFFFFFFF);
 591        }
 592
 593        /* Enable fault detection */
 594        gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 595                (1 << 30) | 0xFFFF);
 596
 597        /* Turn on performance counters */
 598        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 599
 600        /* Select CP0 to always count cycles */
 601        gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 602
 603        /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 604        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 605
 606        /* Increase VFD cache access so LRZ and other data gets evicted less */
 607        gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 608
 609        /* Disable L2 bypass in the UCHE */
 610        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 611        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 612        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 613        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 614
 615        /* Set the GMEM VA range (0 to gpu->gmem) */
 616        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 617        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 618        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 619                0x00100000 + adreno_gpu->gmem - 1);
 620        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 621
 622        gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 623        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 624        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 625        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 626
 627        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 628
 629        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 630                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 631
 632        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 633
 634        /* Enable USE_RETENTION_FLOPS */
 635        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 636
 637        /* Enable ME/PFP split notification */
 638        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 639
 640        /* Enable HWCG */
 641        a5xx_set_hwcg(gpu, true);
 642
 643        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 644
 645        /* Set the highest bank bit */
 646        gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 647        gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 648
 649        /* Protect registers from the CP */
 650        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 651
 652        /* RBBM */
 653        gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 654        gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 655        gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 656        gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 657        gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 658        gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 659
 660        /* Content protect */
 661        gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 662                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 663                        16));
 664        gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 665                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 666
 667        /* CP */
 668        gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 669        gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 670        gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 671        gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 672
 673        /* RB */
 674        gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 675        gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 676
 677        /* VPC */
 678        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 679        gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 680
 681        /* UCHE */
 682        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 683
 684        if (adreno_is_a530(adreno_gpu))
 685                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 686                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 687
 688        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 689        /*
 690         * Disable the trusted memory range - we don't actually supported secure
 691         * memory rendering at this point in time and we don't want to block off
 692         * part of the virtual memory space.
 693         */
 694        gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 695                REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 696        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 697
 698        ret = adreno_hw_init(gpu);
 699        if (ret)
 700                return ret;
 701
 702        a5xx_preempt_hw_init(gpu);
 703
 704        a5xx_gpmu_ucode_init(gpu);
 705
 706        ret = a5xx_ucode_init(gpu);
 707        if (ret)
 708                return ret;
 709
 710        /* Disable the interrupts through the initial bringup stage */
 711        gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 712
 713        /* Clear ME_HALT to start the micro engine */
 714        gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 715        ret = a5xx_me_init(gpu);
 716        if (ret)
 717                return ret;
 718
 719        ret = a5xx_power_init(gpu);
 720        if (ret)
 721                return ret;
 722
 723        /*
 724         * Send a pipeline event stat to get misbehaving counters to start
 725         * ticking correctly
 726         */
 727        if (adreno_is_a530(adreno_gpu)) {
 728                OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 729                OUT_RING(gpu->rb[0], 0x0F);
 730
 731                gpu->funcs->flush(gpu, gpu->rb[0]);
 732                if (!a5xx_idle(gpu, gpu->rb[0]))
 733                        return -EINVAL;
 734        }
 735
 736        /*
 737         * Try to load a zap shader into the secure world. If successful
 738         * we can use the CP to switch out of secure mode. If not then we
 739         * have no resource but to try to switch ourselves out manually. If we
 740         * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 741         * be blocked and a permissions violation will soon follow.
 742         */
 743        ret = a5xx_zap_shader_init(gpu);
 744        if (!ret) {
 745                OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 746                OUT_RING(gpu->rb[0], 0x00000000);
 747
 748                gpu->funcs->flush(gpu, gpu->rb[0]);
 749                if (!a5xx_idle(gpu, gpu->rb[0]))
 750                        return -EINVAL;
 751        } else {
 752                /* Print a warning so if we die, we know why */
 753                dev_warn_once(gpu->dev->dev,
 754                        "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 755                gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 756        }
 757
 758        /* Last step - yield the ringbuffer */
 759        a5xx_preempt_start(gpu);
 760
 761        return 0;
 762}
 763
 764static void a5xx_recover(struct msm_gpu *gpu)
 765{
 766        int i;
 767
 768        adreno_dump_info(gpu);
 769
 770        for (i = 0; i < 8; i++) {
 771                printk("CP_SCRATCH_REG%d: %u\n", i,
 772                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 773        }
 774
 775        if (hang_debug)
 776                a5xx_dump(gpu);
 777
 778        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 779        gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 780        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 781        adreno_recover(gpu);
 782}
 783
 784static void a5xx_destroy(struct msm_gpu *gpu)
 785{
 786        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 787        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 788
 789        DBG("%s", gpu->name);
 790
 791        a5xx_preempt_fini(gpu);
 792
 793        if (a5xx_gpu->pm4_bo) {
 794                if (a5xx_gpu->pm4_iova)
 795                        msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 796                drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo);
 797        }
 798
 799        if (a5xx_gpu->pfp_bo) {
 800                if (a5xx_gpu->pfp_iova)
 801                        msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 802                drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo);
 803        }
 804
 805        if (a5xx_gpu->gpmu_bo) {
 806                if (a5xx_gpu->gpmu_iova)
 807                        msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 808                drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
 809        }
 810
 811        adreno_gpu_cleanup(adreno_gpu);
 812        kfree(a5xx_gpu);
 813}
 814
 815static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 816{
 817        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 818                return false;
 819
 820        /*
 821         * Nearly every abnormality ends up pausing the GPU and triggering a
 822         * fault so we can safely just watch for this one interrupt to fire
 823         */
 824        return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 825                A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 826}
 827
 828bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 829{
 830        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 831        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 832
 833        if (ring != a5xx_gpu->cur_ring) {
 834                WARN(1, "Tried to idle a non-current ringbuffer\n");
 835                return false;
 836        }
 837
 838        /* wait for CP to drain ringbuffer: */
 839        if (!adreno_idle(gpu, ring))
 840                return false;
 841
 842        if (spin_until(_a5xx_check_idle(gpu))) {
 843                DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 844                        gpu->name, __builtin_return_address(0),
 845                        gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 846                        gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 847                        gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 848                        gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 849                return false;
 850        }
 851
 852        return true;
 853}
 854
 855static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 856{
 857        struct msm_gpu *gpu = arg;
 858        pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 859                        iova, flags,
 860                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 861                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 862                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 863                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 864
 865        return -EFAULT;
 866}
 867
 868static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 869{
 870        u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 871
 872        if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 873                u32 val;
 874
 875                gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 876
 877                /*
 878                 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 879                 * read it twice
 880                 */
 881
 882                gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 883                val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 884
 885                dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 886                        val);
 887        }
 888
 889        if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 890                dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 891                        gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 892
 893        if (status & A5XX_CP_INT_CP_DMA_ERROR)
 894                dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 895
 896        if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 897                u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 898
 899                dev_err_ratelimited(gpu->dev->dev,
 900                        "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 901                        val & (1 << 24) ? "WRITE" : "READ",
 902                        (val & 0xFFFFF) >> 2, val);
 903        }
 904
 905        if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 906                u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 907                const char *access[16] = { "reserved", "reserved",
 908                        "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 909                        "", "", "me read", "me write", "", "", "crashdump read",
 910                        "crashdump write" };
 911
 912                dev_err_ratelimited(gpu->dev->dev,
 913                        "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 914                        status & 0xFFFFF, access[(status >> 24) & 0xF],
 915                        (status & (1 << 31)), status);
 916        }
 917}
 918
 919static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 920{
 921        if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 922                u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 923
 924                dev_err_ratelimited(gpu->dev->dev,
 925                        "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 926                        val & (1 << 28) ? "WRITE" : "READ",
 927                        (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 928                        (val >> 24) & 0xF);
 929
 930                /* Clear the error */
 931                gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 932
 933                /* Clear the interrupt */
 934                gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 935                        A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 936        }
 937
 938        if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 939                dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 940
 941        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 942                dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 943                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 944
 945        if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 946                dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 947                        gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 948
 949        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 950                dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 951                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 952
 953        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 954                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
 955
 956        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
 957                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
 958}
 959
 960static void a5xx_uche_err_irq(struct msm_gpu *gpu)
 961{
 962        uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
 963
 964        addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
 965
 966        dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
 967                addr);
 968}
 969
 970static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
 971{
 972        dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
 973}
 974
 975static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 976{
 977        struct drm_device *dev = gpu->dev;
 978        struct msm_drm_private *priv = dev->dev_private;
 979        struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 980
 981        dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 982                ring ? ring->id : -1, ring ? ring->seqno : 0,
 983                gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 984                gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 985                gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
 986                gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
 987                gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
 988                gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
 989                gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
 990
 991        /* Turn off the hangcheck timer to keep it from bothering us */
 992        del_timer(&gpu->hangcheck_timer);
 993
 994        queue_work(priv->wq, &gpu->recover_work);
 995}
 996
 997#define RBBM_ERROR_MASK \
 998        (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 999        A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1000        A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1001        A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1002        A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1003        A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004
1005static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1006{
1007        u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1008
1009        /*
1010         * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1011         * before the source is cleared the interrupt will storm.
1012         */
1013        gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1014                status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1015
1016        /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1017        if (status & RBBM_ERROR_MASK)
1018                a5xx_rbbm_err_irq(gpu, status);
1019
1020        if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1021                a5xx_cp_err_irq(gpu);
1022
1023        if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1024                a5xx_fault_detect_irq(gpu);
1025
1026        if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1027                a5xx_uche_err_irq(gpu);
1028
1029        if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1030                a5xx_gpmu_err_irq(gpu);
1031
1032        if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1033                a5xx_preempt_trigger(gpu);
1034                msm_gpu_retire(gpu);
1035        }
1036
1037        if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1038                a5xx_preempt_irq(gpu);
1039
1040        return IRQ_HANDLED;
1041}
1042
1043static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1044        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1045        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1046        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1047        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1048                REG_A5XX_CP_RB_RPTR_ADDR_HI),
1049        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1050        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1051        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1052};
1053
1054static const u32 a5xx_registers[] = {
1055        0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1056        0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1057        0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1058        0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1059        0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1060        0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1061        0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1062        0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1063        0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1064        0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1065        0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1066        0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1067        0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1068        0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1069        0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1070        0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1071        0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1072        0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1073        0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1074        0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1075        0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1076        0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1077        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1078        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1079        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1080        0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1081        0xB9A0, 0xB9BF, ~0
1082};
1083
1084static void a5xx_dump(struct msm_gpu *gpu)
1085{
1086        dev_info(gpu->dev->dev, "status:   %08x\n",
1087                gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1088        adreno_dump(gpu);
1089}
1090
1091static int a5xx_pm_resume(struct msm_gpu *gpu)
1092{
1093        int ret;
1094
1095        /* Turn on the core power */
1096        ret = msm_gpu_pm_resume(gpu);
1097        if (ret)
1098                return ret;
1099
1100        /* Turn the RBCCU domain first to limit the chances of voltage droop */
1101        gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1102
1103        /* Wait 3 usecs before polling */
1104        udelay(3);
1105
1106        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1107                (1 << 20), (1 << 20));
1108        if (ret) {
1109                DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1110                        gpu->name,
1111                        gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1112                return ret;
1113        }
1114
1115        /* Turn on the SP domain */
1116        gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1117        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1118                (1 << 20), (1 << 20));
1119        if (ret)
1120                DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1121                        gpu->name);
1122
1123        return ret;
1124}
1125
1126static int a5xx_pm_suspend(struct msm_gpu *gpu)
1127{
1128        /* Clear the VBIF pipe before shutting down */
1129        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1130        spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1131
1132        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1133
1134        /*
1135         * Reset the VBIF before power collapse to avoid issue with FIFO
1136         * entries
1137         */
1138        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1139        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1140
1141        return msm_gpu_pm_suspend(gpu);
1142}
1143
1144static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1145{
1146        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1147                REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1148
1149        return 0;
1150}
1151
1152#ifdef CONFIG_DEBUG_FS
1153static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1154{
1155        seq_printf(m, "status:   %08x\n",
1156                        gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1157
1158        /*
1159         * Temporarily disable hardware clock gating before going into
1160         * adreno_show to avoid issues while reading the registers
1161         */
1162        a5xx_set_hwcg(gpu, false);
1163        adreno_show(gpu, m);
1164        a5xx_set_hwcg(gpu, true);
1165}
1166#endif
1167
1168static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1169{
1170        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1171        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1172
1173        return a5xx_gpu->cur_ring;
1174}
1175
1176static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1177{
1178        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1179                REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1180
1181        return 0;
1182}
1183
1184static const struct adreno_gpu_funcs funcs = {
1185        .base = {
1186                .get_param = adreno_get_param,
1187                .hw_init = a5xx_hw_init,
1188                .pm_suspend = a5xx_pm_suspend,
1189                .pm_resume = a5xx_pm_resume,
1190                .recover = a5xx_recover,
1191                .submit = a5xx_submit,
1192                .flush = a5xx_flush,
1193                .active_ring = a5xx_active_ring,
1194                .irq = a5xx_irq,
1195                .destroy = a5xx_destroy,
1196#ifdef CONFIG_DEBUG_FS
1197                .show = a5xx_show,
1198#endif
1199                .gpu_busy = a5xx_gpu_busy,
1200        },
1201        .get_timestamp = a5xx_get_timestamp,
1202};
1203
1204static void check_speed_bin(struct device *dev)
1205{
1206        struct nvmem_cell *cell;
1207        u32 bin, val;
1208
1209        cell = nvmem_cell_get(dev, "speed_bin");
1210
1211        /* If a nvmem cell isn't defined, nothing to do */
1212        if (IS_ERR(cell))
1213                return;
1214
1215        bin = *((u32 *) nvmem_cell_read(cell, NULL));
1216        nvmem_cell_put(cell);
1217
1218        val = (1 << bin);
1219
1220        dev_pm_opp_set_supported_hw(dev, &val, 1);
1221}
1222
1223struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1224{
1225        struct msm_drm_private *priv = dev->dev_private;
1226        struct platform_device *pdev = priv->gpu_pdev;
1227        struct a5xx_gpu *a5xx_gpu = NULL;
1228        struct adreno_gpu *adreno_gpu;
1229        struct msm_gpu *gpu;
1230        int ret;
1231
1232        if (!pdev) {
1233                dev_err(dev->dev, "No A5XX device is defined\n");
1234                return ERR_PTR(-ENXIO);
1235        }
1236
1237        a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1238        if (!a5xx_gpu)
1239                return ERR_PTR(-ENOMEM);
1240
1241        adreno_gpu = &a5xx_gpu->base;
1242        gpu = &adreno_gpu->base;
1243
1244        adreno_gpu->registers = a5xx_registers;
1245        adreno_gpu->reg_offsets = a5xx_register_offsets;
1246
1247        a5xx_gpu->lm_leakage = 0x4E001A;
1248
1249        check_speed_bin(&pdev->dev);
1250
1251        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1252        if (ret) {
1253                a5xx_destroy(&(a5xx_gpu->base.base));
1254                return ERR_PTR(ret);
1255        }
1256
1257        if (gpu->aspace)
1258                msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1259
1260        /* Set up the preemption specific bits and pieces for each ringbuffer */
1261        a5xx_preempt_init(gpu);
1262
1263        return gpu;
1264}
1265