linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
<<
>>
Prefs
   1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 and
   5 * only version 2 as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/cpumask.h>
  16#include <linux/qcom_scm.h>
  17#include <linux/dma-mapping.h>
  18#include <linux/of_address.h>
  19#include <linux/soc/qcom/mdt_loader.h>
  20#include <linux/pm_opp.h>
  21#include <linux/nvmem-consumer.h>
  22#include "msm_gem.h"
  23#include "msm_mmu.h"
  24#include "a5xx_gpu.h"
  25
  26extern bool hang_debug;
  27static void a5xx_dump(struct msm_gpu *gpu);
  28
  29#define GPU_PAS_ID 13
  30
  31static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  32{
  33        struct device *dev = &gpu->pdev->dev;
  34        const struct firmware *fw;
  35        struct device_node *np;
  36        struct resource r;
  37        phys_addr_t mem_phys;
  38        ssize_t mem_size;
  39        void *mem_region = NULL;
  40        int ret;
  41
  42        if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  43                return -EINVAL;
  44
  45        np = of_get_child_by_name(dev->of_node, "zap-shader");
  46        if (!np)
  47                return -ENODEV;
  48
  49        np = of_parse_phandle(np, "memory-region", 0);
  50        if (!np)
  51                return -EINVAL;
  52
  53        ret = of_address_to_resource(np, 0, &r);
  54        if (ret)
  55                return ret;
  56
  57        mem_phys = r.start;
  58        mem_size = resource_size(&r);
  59
  60        /* Request the MDT file for the firmware */
  61        fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  62        if (IS_ERR(fw)) {
  63                DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  64                return PTR_ERR(fw);
  65        }
  66
  67        /* Figure out how much memory we need */
  68        mem_size = qcom_mdt_get_size(fw);
  69        if (mem_size < 0) {
  70                ret = mem_size;
  71                goto out;
  72        }
  73
  74        /* Allocate memory for the firmware image */
  75        mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  76        if (!mem_region) {
  77                ret = -ENOMEM;
  78                goto out;
  79        }
  80
  81        /*
  82         * Load the rest of the MDT
  83         *
  84         * Note that we could be dealing with two different paths, since
  85         * with upstream linux-firmware it would be in a qcom/ subdir..
  86         * adreno_request_fw() handles this, but qcom_mdt_load() does
  87         * not.  But since we've already gotten thru adreno_request_fw()
  88         * we know which of the two cases it is:
  89         */
  90        if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  91                ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  92                                mem_region, mem_phys, mem_size, NULL);
  93        } else {
  94                char newname[strlen("qcom/") + strlen(fwname) + 1];
  95
  96                sprintf(newname, "qcom/%s", fwname);
  97
  98                ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
  99                                mem_region, mem_phys, mem_size, NULL);
 100        }
 101        if (ret)
 102                goto out;
 103
 104        /* Send the image to the secure world */
 105        ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 106        if (ret)
 107                DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 108
 109out:
 110        if (mem_region)
 111                memunmap(mem_region);
 112
 113        release_firmware(fw);
 114
 115        return ret;
 116}
 117
 118static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 119{
 120        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 121        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 122        uint32_t wptr;
 123        unsigned long flags;
 124
 125        spin_lock_irqsave(&ring->lock, flags);
 126
 127        /* Copy the shadow to the actual register */
 128        ring->cur = ring->next;
 129
 130        /* Make sure to wrap wptr if we need to */
 131        wptr = get_wptr(ring);
 132
 133        spin_unlock_irqrestore(&ring->lock, flags);
 134
 135        /* Make sure everything is posted before making a decision */
 136        mb();
 137
 138        /* Update HW if this is the current ring and we are not in preempt */
 139        if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 140                gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 141}
 142
 143static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 144        struct msm_file_private *ctx)
 145{
 146        struct msm_drm_private *priv = gpu->dev->dev_private;
 147        struct msm_ringbuffer *ring = submit->ring;
 148        struct msm_gem_object *obj;
 149        uint32_t *ptr, dwords;
 150        unsigned int i;
 151
 152        for (i = 0; i < submit->nr_cmds; i++) {
 153                switch (submit->cmd[i].type) {
 154                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 155                        break;
 156                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 157                        if (priv->lastctx == ctx)
 158                                break;
 159                case MSM_SUBMIT_CMD_BUF:
 160                        /* copy commands into RB: */
 161                        obj = submit->bos[submit->cmd[i].idx].obj;
 162                        dwords = submit->cmd[i].size;
 163
 164                        ptr = msm_gem_get_vaddr(&obj->base);
 165
 166                        /* _get_vaddr() shouldn't fail at this point,
 167                         * since we've already mapped it once in
 168                         * submit_reloc()
 169                         */
 170                        if (WARN_ON(!ptr))
 171                                return;
 172
 173                        for (i = 0; i < dwords; i++) {
 174                                /* normally the OUT_PKTn() would wait
 175                                 * for space for the packet.  But since
 176                                 * we just OUT_RING() the whole thing,
 177                                 * need to call adreno_wait_ring()
 178                                 * ourself:
 179                                 */
 180                                adreno_wait_ring(ring, 1);
 181                                OUT_RING(ring, ptr[i]);
 182                        }
 183
 184                        msm_gem_put_vaddr(&obj->base);
 185
 186                        break;
 187                }
 188        }
 189
 190        a5xx_flush(gpu, ring);
 191        a5xx_preempt_trigger(gpu);
 192
 193        /* we might not necessarily have a cmd from userspace to
 194         * trigger an event to know that submit has completed, so
 195         * do this manually:
 196         */
 197        a5xx_idle(gpu, ring);
 198        ring->memptrs->fence = submit->seqno;
 199        msm_gpu_retire(gpu);
 200}
 201
 202static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 203        struct msm_file_private *ctx)
 204{
 205        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 206        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 207        struct msm_drm_private *priv = gpu->dev->dev_private;
 208        struct msm_ringbuffer *ring = submit->ring;
 209        unsigned int i, ibs = 0;
 210
 211        if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 212                priv->lastctx = NULL;
 213                a5xx_submit_in_rb(gpu, submit, ctx);
 214                return;
 215        }
 216
 217        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 218        OUT_RING(ring, 0x02);
 219
 220        /* Turn off protected mode to write to special registers */
 221        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 222        OUT_RING(ring, 0);
 223
 224        /* Set the save preemption record for the ring/command */
 225        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 226        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 227        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 228
 229        /* Turn back on protected mode */
 230        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 231        OUT_RING(ring, 1);
 232
 233        /* Enable local preemption for finegrain preemption */
 234        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 235        OUT_RING(ring, 0x02);
 236
 237        /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 238        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 239        OUT_RING(ring, 0x02);
 240
 241        /* Submit the commands */
 242        for (i = 0; i < submit->nr_cmds; i++) {
 243                switch (submit->cmd[i].type) {
 244                case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 245                        break;
 246                case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 247                        if (priv->lastctx == ctx)
 248                                break;
 249                case MSM_SUBMIT_CMD_BUF:
 250                        OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 251                        OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 252                        OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 253                        OUT_RING(ring, submit->cmd[i].size);
 254                        ibs++;
 255                        break;
 256                }
 257        }
 258
 259        /*
 260         * Write the render mode to NULL (0) to indicate to the CP that the IBs
 261         * are done rendering - otherwise a lucky preemption would start
 262         * replaying from the last checkpoint
 263         */
 264        OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 265        OUT_RING(ring, 0);
 266        OUT_RING(ring, 0);
 267        OUT_RING(ring, 0);
 268        OUT_RING(ring, 0);
 269        OUT_RING(ring, 0);
 270
 271        /* Turn off IB level preemptions */
 272        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 273        OUT_RING(ring, 0x01);
 274
 275        /* Write the fence to the scratch register */
 276        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 277        OUT_RING(ring, submit->seqno);
 278
 279        /*
 280         * Execute a CACHE_FLUSH_TS event. This will ensure that the
 281         * timestamp is written to the memory and then triggers the interrupt
 282         */
 283        OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 284        OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 285        OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 286        OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 287        OUT_RING(ring, submit->seqno);
 288
 289        /* Yield the floor on command completion */
 290        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 291        /*
 292         * If dword[2:1] are non zero, they specify an address for the CP to
 293         * write the value of dword[3] to on preemption complete. Write 0 to
 294         * skip the write
 295         */
 296        OUT_RING(ring, 0x00);
 297        OUT_RING(ring, 0x00);
 298        /* Data value - not used if the address above is 0 */
 299        OUT_RING(ring, 0x01);
 300        /* Set bit 0 to trigger an interrupt on preempt complete */
 301        OUT_RING(ring, 0x01);
 302
 303        a5xx_flush(gpu, ring);
 304
 305        /* Check to see if we need to start preemption */
 306        a5xx_preempt_trigger(gpu);
 307}
 308
 309static const struct {
 310        u32 offset;
 311        u32 value;
 312} a5xx_hwcg[] = {
 313        {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 314        {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 315        {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 316        {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 317        {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 318        {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 319        {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 320        {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 321        {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 322        {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 323        {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 324        {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 325        {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 326        {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 327        {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 328        {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 329        {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 330        {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 331        {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 332        {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 333        {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 334        {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 335        {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 336        {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 337        {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 338        {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 339        {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 340        {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 341        {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 342        {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 343        {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 344        {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 345        {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 346        {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 347        {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 348        {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 349        {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 350        {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 351        {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 352        {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 353        {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 354        {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 355        {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 356        {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 357        {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 358        {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 359        {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 360        {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 361        {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 362        {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 363        {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 364        {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 365        {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 366        {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 367        {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 368        {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 369        {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 370        {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 371        {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 372        {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 373        {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 374        {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 375        {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 376        {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 377        {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 378        {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 379        {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 380        {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 381        {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 382        {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 383        {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 384        {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 385        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 386        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 387        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 388        {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 389        {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 390        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 391        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 392        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 393        {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 394        {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 395        {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 396        {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 397        {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 398        {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 399        {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 400        {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 401        {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 402        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 403        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 404        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 405};
 406
 407void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 408{
 409        unsigned int i;
 410
 411        for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 412                gpu_write(gpu, a5xx_hwcg[i].offset,
 413                        state ? a5xx_hwcg[i].value : 0);
 414
 415        gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 416        gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 417}
 418
 419static int a5xx_me_init(struct msm_gpu *gpu)
 420{
 421        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 422        struct msm_ringbuffer *ring = gpu->rb[0];
 423
 424        OUT_PKT7(ring, CP_ME_INIT, 8);
 425
 426        OUT_RING(ring, 0x0000002F);
 427
 428        /* Enable multiple hardware contexts */
 429        OUT_RING(ring, 0x00000003);
 430
 431        /* Enable error detection */
 432        OUT_RING(ring, 0x20000000);
 433
 434        /* Don't enable header dump */
 435        OUT_RING(ring, 0x00000000);
 436        OUT_RING(ring, 0x00000000);
 437
 438        /* Specify workarounds for various microcode issues */
 439        if (adreno_is_a530(adreno_gpu)) {
 440                /* Workaround for token end syncs
 441                 * Force a WFI after every direct-render 3D mode draw and every
 442                 * 2D mode 3 draw
 443                 */
 444                OUT_RING(ring, 0x0000000B);
 445        } else {
 446                /* No workarounds enabled */
 447                OUT_RING(ring, 0x00000000);
 448        }
 449
 450        OUT_RING(ring, 0x00000000);
 451        OUT_RING(ring, 0x00000000);
 452
 453        gpu->funcs->flush(gpu, ring);
 454        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 455}
 456
 457static int a5xx_preempt_start(struct msm_gpu *gpu)
 458{
 459        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 460        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 461        struct msm_ringbuffer *ring = gpu->rb[0];
 462
 463        if (gpu->nr_rings == 1)
 464                return 0;
 465
 466        /* Turn off protected mode to write to special registers */
 467        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 468        OUT_RING(ring, 0);
 469
 470        /* Set the save preemption record for the ring/command */
 471        OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 472        OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 473        OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 474
 475        /* Turn back on protected mode */
 476        OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 477        OUT_RING(ring, 1);
 478
 479        OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 480        OUT_RING(ring, 0x00);
 481
 482        OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 483        OUT_RING(ring, 0x01);
 484
 485        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 486        OUT_RING(ring, 0x01);
 487
 488        /* Yield the floor on command completion */
 489        OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 490        OUT_RING(ring, 0x00);
 491        OUT_RING(ring, 0x00);
 492        OUT_RING(ring, 0x01);
 493        OUT_RING(ring, 0x01);
 494
 495        gpu->funcs->flush(gpu, ring);
 496
 497        return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 498}
 499
 500static int a5xx_ucode_init(struct msm_gpu *gpu)
 501{
 502        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 503        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 504        int ret;
 505
 506        if (!a5xx_gpu->pm4_bo) {
 507                a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 508                        adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 509
 510                if (IS_ERR(a5xx_gpu->pm4_bo)) {
 511                        ret = PTR_ERR(a5xx_gpu->pm4_bo);
 512                        a5xx_gpu->pm4_bo = NULL;
 513                        dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 514                                ret);
 515                        return ret;
 516                }
 517        }
 518
 519        if (!a5xx_gpu->pfp_bo) {
 520                a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 521                        adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 522
 523                if (IS_ERR(a5xx_gpu->pfp_bo)) {
 524                        ret = PTR_ERR(a5xx_gpu->pfp_bo);
 525                        a5xx_gpu->pfp_bo = NULL;
 526                        dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 527                                ret);
 528                        return ret;
 529                }
 530        }
 531
 532        gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 533                REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 534
 535        gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 536                REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 537
 538        return 0;
 539}
 540
 541#define SCM_GPU_ZAP_SHADER_RESUME 0
 542
 543static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 544{
 545        int ret;
 546
 547        ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 548        if (ret)
 549                DRM_ERROR("%s: zap-shader resume failed: %d\n",
 550                        gpu->name, ret);
 551
 552        return ret;
 553}
 554
 555static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 556{
 557        static bool loaded;
 558        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 559        struct platform_device *pdev = gpu->pdev;
 560        int ret;
 561
 562        /*
 563         * If the zap shader is already loaded into memory we just need to kick
 564         * the remote processor to reinitialize it
 565         */
 566        if (loaded)
 567                return a5xx_zap_shader_resume(gpu);
 568
 569        /* We need SCM to be able to load the firmware */
 570        if (!qcom_scm_is_available()) {
 571                DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 572                return -EPROBE_DEFER;
 573        }
 574
 575        /* Each GPU has a target specific zap shader firmware name to use */
 576        if (!adreno_gpu->info->zapfw) {
 577                DRM_DEV_ERROR(&pdev->dev,
 578                        "Zap shader firmware file not specified for this target\n");
 579                return -ENODEV;
 580        }
 581
 582        ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 583
 584        loaded = !ret;
 585
 586        return ret;
 587}
 588
 589#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 590          A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 591          A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 592          A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 593          A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 594          A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 595          A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 596          A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 597          A5XX_RBBM_INT_0_MASK_CP_SW | \
 598          A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 599          A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 600          A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 601
 602static int a5xx_hw_init(struct msm_gpu *gpu)
 603{
 604        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 605        int ret;
 606
 607        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 608
 609        /* Make all blocks contribute to the GPU BUSY perf counter */
 610        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 611
 612        /* Enable RBBM error reporting bits */
 613        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 614
 615        if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 616                /*
 617                 * Mask out the activity signals from RB1-3 to avoid false
 618                 * positives
 619                 */
 620
 621                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 622                        0xF0000000);
 623                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 624                        0xFFFFFFFF);
 625                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 626                        0xFFFFFFFF);
 627                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 628                        0xFFFFFFFF);
 629                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 630                        0xFFFFFFFF);
 631                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 632                        0xFFFFFFFF);
 633                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 634                        0xFFFFFFFF);
 635                gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 636                        0xFFFFFFFF);
 637        }
 638
 639        /* Enable fault detection */
 640        gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 641                (1 << 30) | 0xFFFF);
 642
 643        /* Turn on performance counters */
 644        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 645
 646        /* Select CP0 to always count cycles */
 647        gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 648
 649        /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 650        gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 651
 652        /* Increase VFD cache access so LRZ and other data gets evicted less */
 653        gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 654
 655        /* Disable L2 bypass in the UCHE */
 656        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 657        gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 658        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 659        gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 660
 661        /* Set the GMEM VA range (0 to gpu->gmem) */
 662        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 663        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 664        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 665                0x00100000 + adreno_gpu->gmem - 1);
 666        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 667
 668        gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 669        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 670        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 671        gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 672
 673        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 674
 675        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 676                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 677
 678        gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 679
 680        /* Enable USE_RETENTION_FLOPS */
 681        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 682
 683        /* Enable ME/PFP split notification */
 684        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 685
 686        /* Enable HWCG */
 687        a5xx_set_hwcg(gpu, true);
 688
 689        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 690
 691        /* Set the highest bank bit */
 692        gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 693        gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 694
 695        /* Protect registers from the CP */
 696        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 697
 698        /* RBBM */
 699        gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 700        gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 701        gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 702        gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 703        gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 704        gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 705
 706        /* Content protect */
 707        gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 708                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 709                        16));
 710        gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 711                ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 712
 713        /* CP */
 714        gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 715        gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 716        gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 717        gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 718
 719        /* RB */
 720        gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 721        gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 722
 723        /* VPC */
 724        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 725        gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 726
 727        /* UCHE */
 728        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 729
 730        if (adreno_is_a530(adreno_gpu))
 731                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 732                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 733
 734        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 735        /*
 736         * Disable the trusted memory range - we don't actually supported secure
 737         * memory rendering at this point in time and we don't want to block off
 738         * part of the virtual memory space.
 739         */
 740        gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 741                REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 742        gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 743
 744        ret = adreno_hw_init(gpu);
 745        if (ret)
 746                return ret;
 747
 748        a5xx_preempt_hw_init(gpu);
 749
 750        a5xx_gpmu_ucode_init(gpu);
 751
 752        ret = a5xx_ucode_init(gpu);
 753        if (ret)
 754                return ret;
 755
 756        /* Disable the interrupts through the initial bringup stage */
 757        gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 758
 759        /* Clear ME_HALT to start the micro engine */
 760        gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 761        ret = a5xx_me_init(gpu);
 762        if (ret)
 763                return ret;
 764
 765        ret = a5xx_power_init(gpu);
 766        if (ret)
 767                return ret;
 768
 769        /*
 770         * Send a pipeline event stat to get misbehaving counters to start
 771         * ticking correctly
 772         */
 773        if (adreno_is_a530(adreno_gpu)) {
 774                OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 775                OUT_RING(gpu->rb[0], 0x0F);
 776
 777                gpu->funcs->flush(gpu, gpu->rb[0]);
 778                if (!a5xx_idle(gpu, gpu->rb[0]))
 779                        return -EINVAL;
 780        }
 781
 782        /*
 783         * Try to load a zap shader into the secure world. If successful
 784         * we can use the CP to switch out of secure mode. If not then we
 785         * have no resource but to try to switch ourselves out manually. If we
 786         * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 787         * be blocked and a permissions violation will soon follow.
 788         */
 789        ret = a5xx_zap_shader_init(gpu);
 790        if (!ret) {
 791                OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 792                OUT_RING(gpu->rb[0], 0x00000000);
 793
 794                gpu->funcs->flush(gpu, gpu->rb[0]);
 795                if (!a5xx_idle(gpu, gpu->rb[0]))
 796                        return -EINVAL;
 797        } else {
 798                /* Print a warning so if we die, we know why */
 799                dev_warn_once(gpu->dev->dev,
 800                        "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 801                gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 802        }
 803
 804        /* Last step - yield the ringbuffer */
 805        a5xx_preempt_start(gpu);
 806
 807        return 0;
 808}
 809
 810static void a5xx_recover(struct msm_gpu *gpu)
 811{
 812        int i;
 813
 814        adreno_dump_info(gpu);
 815
 816        for (i = 0; i < 8; i++) {
 817                printk("CP_SCRATCH_REG%d: %u\n", i,
 818                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 819        }
 820
 821        if (hang_debug)
 822                a5xx_dump(gpu);
 823
 824        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 825        gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 826        gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 827        adreno_recover(gpu);
 828}
 829
 830static void a5xx_destroy(struct msm_gpu *gpu)
 831{
 832        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 833        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 834
 835        DBG("%s", gpu->name);
 836
 837        a5xx_preempt_fini(gpu);
 838
 839        if (a5xx_gpu->pm4_bo) {
 840                if (a5xx_gpu->pm4_iova)
 841                        msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 842                drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 843        }
 844
 845        if (a5xx_gpu->pfp_bo) {
 846                if (a5xx_gpu->pfp_iova)
 847                        msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 848                drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 849        }
 850
 851        if (a5xx_gpu->gpmu_bo) {
 852                if (a5xx_gpu->gpmu_iova)
 853                        msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 854                drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 855        }
 856
 857        adreno_gpu_cleanup(adreno_gpu);
 858        kfree(a5xx_gpu);
 859}
 860
 861static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 862{
 863        if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 864                return false;
 865
 866        /*
 867         * Nearly every abnormality ends up pausing the GPU and triggering a
 868         * fault so we can safely just watch for this one interrupt to fire
 869         */
 870        return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 871                A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 872}
 873
 874bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 875{
 876        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 877        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 878
 879        if (ring != a5xx_gpu->cur_ring) {
 880                WARN(1, "Tried to idle a non-current ringbuffer\n");
 881                return false;
 882        }
 883
 884        /* wait for CP to drain ringbuffer: */
 885        if (!adreno_idle(gpu, ring))
 886                return false;
 887
 888        if (spin_until(_a5xx_check_idle(gpu))) {
 889                DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 890                        gpu->name, __builtin_return_address(0),
 891                        gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 892                        gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 893                        gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 894                        gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 895                return false;
 896        }
 897
 898        return true;
 899}
 900
 901static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 902{
 903        struct msm_gpu *gpu = arg;
 904        pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 905                        iova, flags,
 906                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 907                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 908                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 909                        gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 910
 911        return -EFAULT;
 912}
 913
 914static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 915{
 916        u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 917
 918        if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 919                u32 val;
 920
 921                gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 922
 923                /*
 924                 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 925                 * read it twice
 926                 */
 927
 928                gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 929                val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 930
 931                dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 932                        val);
 933        }
 934
 935        if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 936                dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 937                        gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 938
 939        if (status & A5XX_CP_INT_CP_DMA_ERROR)
 940                dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 941
 942        if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 943                u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 944
 945                dev_err_ratelimited(gpu->dev->dev,
 946                        "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 947                        val & (1 << 24) ? "WRITE" : "READ",
 948                        (val & 0xFFFFF) >> 2, val);
 949        }
 950
 951        if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 952                u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 953                const char *access[16] = { "reserved", "reserved",
 954                        "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 955                        "", "", "me read", "me write", "", "", "crashdump read",
 956                        "crashdump write" };
 957
 958                dev_err_ratelimited(gpu->dev->dev,
 959                        "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 960                        status & 0xFFFFF, access[(status >> 24) & 0xF],
 961                        (status & (1 << 31)), status);
 962        }
 963}
 964
 965static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 966{
 967        if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 968                u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 969
 970                dev_err_ratelimited(gpu->dev->dev,
 971                        "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 972                        val & (1 << 28) ? "WRITE" : "READ",
 973                        (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 974                        (val >> 24) & 0xF);
 975
 976                /* Clear the error */
 977                gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 978
 979                /* Clear the interrupt */
 980                gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 981                        A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 982        }
 983
 984        if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 985                dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 986
 987        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 988                dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 989                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 990
 991        if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 992                dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 993                        gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 994
 995        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 996                dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 997                        gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 998
 999        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1000                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1001
1002        if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1003                dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1004}
1005
1006static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1007{
1008        uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1009
1010        addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1011
1012        dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1013                addr);
1014}
1015
1016static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1017{
1018        dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1019}
1020
1021static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1022{
1023        struct drm_device *dev = gpu->dev;
1024        struct msm_drm_private *priv = dev->dev_private;
1025        struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1026
1027        dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1028                ring ? ring->id : -1, ring ? ring->seqno : 0,
1029                gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1030                gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1031                gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1032                gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1033                gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1034                gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1035                gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1036
1037        /* Turn off the hangcheck timer to keep it from bothering us */
1038        del_timer(&gpu->hangcheck_timer);
1039
1040        queue_work(priv->wq, &gpu->recover_work);
1041}
1042
1043#define RBBM_ERROR_MASK \
1044        (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1045        A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1046        A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1047        A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1048        A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1049        A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1050
1051static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1052{
1053        u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1054
1055        /*
1056         * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1057         * before the source is cleared the interrupt will storm.
1058         */
1059        gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1060                status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1061
1062        /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1063        if (status & RBBM_ERROR_MASK)
1064                a5xx_rbbm_err_irq(gpu, status);
1065
1066        if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1067                a5xx_cp_err_irq(gpu);
1068
1069        if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1070                a5xx_fault_detect_irq(gpu);
1071
1072        if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1073                a5xx_uche_err_irq(gpu);
1074
1075        if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1076                a5xx_gpmu_err_irq(gpu);
1077
1078        if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1079                a5xx_preempt_trigger(gpu);
1080                msm_gpu_retire(gpu);
1081        }
1082
1083        if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1084                a5xx_preempt_irq(gpu);
1085
1086        return IRQ_HANDLED;
1087}
1088
1089static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1090        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1091        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1092        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1093        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1094                REG_A5XX_CP_RB_RPTR_ADDR_HI),
1095        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1096        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1097        REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1098};
1099
1100static const u32 a5xx_registers[] = {
1101        0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1102        0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1103        0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1104        0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1105        0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1106        0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1107        0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1108        0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1109        0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1110        0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1111        0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1112        0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1113        0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1114        0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1115        0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1116        0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1117        0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1118        0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1119        0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1120        0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1121        0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1122        0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1123        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1124        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1125        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1126        0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1127        0xB9A0, 0xB9BF, ~0
1128};
1129
1130static void a5xx_dump(struct msm_gpu *gpu)
1131{
1132        dev_info(gpu->dev->dev, "status:   %08x\n",
1133                gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1134        adreno_dump(gpu);
1135}
1136
1137static int a5xx_pm_resume(struct msm_gpu *gpu)
1138{
1139        int ret;
1140
1141        /* Turn on the core power */
1142        ret = msm_gpu_pm_resume(gpu);
1143        if (ret)
1144                return ret;
1145
1146        /* Turn the RBCCU domain first to limit the chances of voltage droop */
1147        gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1148
1149        /* Wait 3 usecs before polling */
1150        udelay(3);
1151
1152        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1153                (1 << 20), (1 << 20));
1154        if (ret) {
1155                DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1156                        gpu->name,
1157                        gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1158                return ret;
1159        }
1160
1161        /* Turn on the SP domain */
1162        gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1163        ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1164                (1 << 20), (1 << 20));
1165        if (ret)
1166                DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1167                        gpu->name);
1168
1169        return ret;
1170}
1171
1172static int a5xx_pm_suspend(struct msm_gpu *gpu)
1173{
1174        /* Clear the VBIF pipe before shutting down */
1175        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1176        spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1177
1178        gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1179
1180        /*
1181         * Reset the VBIF before power collapse to avoid issue with FIFO
1182         * entries
1183         */
1184        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1185        gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1186
1187        return msm_gpu_pm_suspend(gpu);
1188}
1189
1190static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1191{
1192        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1193                REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1194
1195        return 0;
1196}
1197
1198#ifdef CONFIG_DEBUG_FS
1199static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1200{
1201        seq_printf(m, "status:   %08x\n",
1202                        gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1203
1204        /*
1205         * Temporarily disable hardware clock gating before going into
1206         * adreno_show to avoid issues while reading the registers
1207         */
1208        a5xx_set_hwcg(gpu, false);
1209        adreno_show(gpu, m);
1210        a5xx_set_hwcg(gpu, true);
1211}
1212#endif
1213
1214static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1215{
1216        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1217        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1218
1219        return a5xx_gpu->cur_ring;
1220}
1221
1222static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1223{
1224        *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1225                REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1226
1227        return 0;
1228}
1229
1230static const struct adreno_gpu_funcs funcs = {
1231        .base = {
1232                .get_param = adreno_get_param,
1233                .hw_init = a5xx_hw_init,
1234                .pm_suspend = a5xx_pm_suspend,
1235                .pm_resume = a5xx_pm_resume,
1236                .recover = a5xx_recover,
1237                .submit = a5xx_submit,
1238                .flush = a5xx_flush,
1239                .active_ring = a5xx_active_ring,
1240                .irq = a5xx_irq,
1241                .destroy = a5xx_destroy,
1242#ifdef CONFIG_DEBUG_FS
1243                .show = a5xx_show,
1244                .debugfs_init = a5xx_debugfs_init,
1245#endif
1246                .gpu_busy = a5xx_gpu_busy,
1247        },
1248        .get_timestamp = a5xx_get_timestamp,
1249};
1250
1251static void check_speed_bin(struct device *dev)
1252{
1253        struct nvmem_cell *cell;
1254        u32 bin, val;
1255
1256        cell = nvmem_cell_get(dev, "speed_bin");
1257
1258        /* If a nvmem cell isn't defined, nothing to do */
1259        if (IS_ERR(cell))
1260                return;
1261
1262        bin = *((u32 *) nvmem_cell_read(cell, NULL));
1263        nvmem_cell_put(cell);
1264
1265        val = (1 << bin);
1266
1267        dev_pm_opp_set_supported_hw(dev, &val, 1);
1268}
1269
1270struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1271{
1272        struct msm_drm_private *priv = dev->dev_private;
1273        struct platform_device *pdev = priv->gpu_pdev;
1274        struct a5xx_gpu *a5xx_gpu = NULL;
1275        struct adreno_gpu *adreno_gpu;
1276        struct msm_gpu *gpu;
1277        int ret;
1278
1279        if (!pdev) {
1280                dev_err(dev->dev, "No A5XX device is defined\n");
1281                return ERR_PTR(-ENXIO);
1282        }
1283
1284        a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1285        if (!a5xx_gpu)
1286                return ERR_PTR(-ENOMEM);
1287
1288        adreno_gpu = &a5xx_gpu->base;
1289        gpu = &adreno_gpu->base;
1290
1291        adreno_gpu->registers = a5xx_registers;
1292        adreno_gpu->reg_offsets = a5xx_register_offsets;
1293
1294        a5xx_gpu->lm_leakage = 0x4E001A;
1295
1296        check_speed_bin(&pdev->dev);
1297
1298        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1299        if (ret) {
1300                a5xx_destroy(&(a5xx_gpu->base.base));
1301                return ERR_PTR(ret);
1302        }
1303
1304        if (gpu->aspace)
1305                msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1306
1307        /* Set up the preemption specific bits and pieces for each ringbuffer */
1308        a5xx_preempt_init(gpu);
1309
1310        return gpu;
1311}
1312