linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "soc15.h"
  33#include "soc15d.h"
  34#include "amdgpu_atomfirmware.h"
  35#include "amdgpu_pm.h"
  36
  37#include "gc/gc_9_0_offset.h"
  38#include "gc/gc_9_0_sh_mask.h"
  39
  40#include "vega10_enum.h"
  41#include "hdp/hdp_4_0_offset.h"
  42
  43#include "soc15_common.h"
  44#include "clearstate_gfx9.h"
  45#include "v9_structs.h"
  46
  47#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
  48
  49#include "amdgpu_ras.h"
  50
  51#define GFX9_NUM_GFX_RINGS     1
  52#define GFX9_MEC_HPD_SIZE 4096
  53#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  54#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  55
  56#define mmPWR_MISC_CNTL_STATUS                                  0x0183
  57#define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
  58#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
  59#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
  60#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
  61#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
  62
  63#define mmGCEA_PROBE_MAP                        0x070c
  64#define mmGCEA_PROBE_MAP_BASE_IDX               0
  65
  66MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  67MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  68MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  69MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  70MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  71MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  72
  73MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  74MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  75MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  76MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  77MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  78MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  79
  80MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
  81MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
  82MODULE_FIRMWARE("amdgpu/vega20_me.bin");
  83MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
  84MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
  85MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
  86
  87MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  88MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  89MODULE_FIRMWARE("amdgpu/raven_me.bin");
  90MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  91MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  92MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  93
  94MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
  95MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/picasso_me.bin");
  97MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
  98MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
  99MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
 100MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 101
 102MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 103MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/raven2_me.bin");
 105MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
 106MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
 107MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
 108MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
 109
 110MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
 111MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
 112MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
 115MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/renoir_me.bin");
 117MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
 118MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
 120
 121#define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
 122#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
 123#define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
 124#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
 125#define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
 126#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
 127#define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
 128#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
 129#define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
 130#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
 131#define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
 132#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
 133
 134enum ta_ras_gfx_subblock {
 135        /*CPC*/
 136        TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
 137        TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
 138        TA_RAS_BLOCK__GFX_CPC_UCODE,
 139        TA_RAS_BLOCK__GFX_DC_STATE_ME1,
 140        TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
 141        TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
 142        TA_RAS_BLOCK__GFX_DC_STATE_ME2,
 143        TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
 144        TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 145        TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 146        /* CPF*/
 147        TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 148        TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 149        TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
 150        TA_RAS_BLOCK__GFX_CPF_TAG,
 151        TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
 152        /* CPG*/
 153        TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 154        TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 155        TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
 156        TA_RAS_BLOCK__GFX_CPG_TAG,
 157        TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
 158        /* GDS*/
 159        TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 160        TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 161        TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
 162        TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
 163        TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
 164        TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 165        TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 166        /* SPI*/
 167        TA_RAS_BLOCK__GFX_SPI_SR_MEM,
 168        /* SQ*/
 169        TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 170        TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 171        TA_RAS_BLOCK__GFX_SQ_LDS_D,
 172        TA_RAS_BLOCK__GFX_SQ_LDS_I,
 173        TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
 174        TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
 175        /* SQC (3 ranges)*/
 176        TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 177        /* SQC range 0*/
 178        TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 179        TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
 180                TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
 181        TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
 182        TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
 183        TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
 184        TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
 185        TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
 186        TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 187        TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
 188                TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 189        /* SQC range 1*/
 190        TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 191        TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
 192                TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 193        TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
 194        TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
 195        TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
 196        TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
 197        TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
 198        TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
 199        TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
 200        TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 201        TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
 202                TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 203        /* SQC range 2*/
 204        TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 205        TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
 206                TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 207        TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
 208        TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
 209        TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
 210        TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
 211        TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
 212        TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
 213        TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
 214        TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 215        TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
 216                TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 217        TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
 218        /* TA*/
 219        TA_RAS_BLOCK__GFX_TA_INDEX_START,
 220        TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
 221        TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
 222        TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
 223        TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
 224        TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 225        TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 226        /* TCA*/
 227        TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 228        TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 229        TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 230        TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 231        /* TCC (5 sub-ranges)*/
 232        TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 233        /* TCC range 0*/
 234        TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 235        TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
 236        TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
 237        TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
 238        TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
 239        TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
 240        TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
 241        TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
 242        TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 243        TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 244        /* TCC range 1*/
 245        TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 246        TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 247        TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 248        TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
 249                TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 250        /* TCC range 2*/
 251        TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 252        TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 253        TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
 254        TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
 255        TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
 256        TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
 257        TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
 258        TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
 259        TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 260        TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
 261                TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 262        /* TCC range 3*/
 263        TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 264        TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 265        TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 266        TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
 267                TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 268        /* TCC range 4*/
 269        TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 270        TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
 271                TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 272        TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 273        TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
 274                TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 275        TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
 276        /* TCI*/
 277        TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
 278        /* TCP*/
 279        TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 280        TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 281        TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
 282        TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
 283        TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
 284        TA_RAS_BLOCK__GFX_TCP_DB_RAM,
 285        TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
 286        TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 287        TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 288        /* TD*/
 289        TA_RAS_BLOCK__GFX_TD_INDEX_START,
 290        TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
 291        TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
 292        TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 293        TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 294        /* EA (3 sub-ranges)*/
 295        TA_RAS_BLOCK__GFX_EA_INDEX_START,
 296        /* EA range 0*/
 297        TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
 298        TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
 299        TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
 300        TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
 301        TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
 302        TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
 303        TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
 304        TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
 305        TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 306        TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 307        /* EA range 1*/
 308        TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 309        TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 310        TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
 311        TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
 312        TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
 313        TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
 314        TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
 315        TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 316        TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 317        /* EA range 2*/
 318        TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 319        TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 320        TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
 321        TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
 322        TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 323        TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 324        TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
 325        /* UTC VM L2 bank*/
 326        TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
 327        /* UTC VM walker*/
 328        TA_RAS_BLOCK__UTC_VML2_WALKER,
 329        /* UTC ATC L2 2MB cache*/
 330        TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
 331        /* UTC ATC L2 4KB cache*/
 332        TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
 333        TA_RAS_BLOCK__GFX_MAX
 334};
 335
 336struct ras_gfx_subblock {
 337        unsigned char *name;
 338        int ta_subblock;
 339        int hw_supported_error_type;
 340        int sw_supported_error_type;
 341};
 342
 343#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
 344        [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
 345                #subblock,                                                     \
 346                TA_RAS_BLOCK__##subblock,                                      \
 347                ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
 348                (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
 349        }
 350
 351static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
 352        AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
 353        AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
 354        AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 355        AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 356        AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 357        AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 358        AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 359        AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 360        AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 361        AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 362        AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
 363        AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
 364        AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
 365        AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
 366        AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 367        AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
 368        AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
 369                             0),
 370        AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
 371                             0),
 372        AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 373        AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 374        AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 375        AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
 376        AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
 377        AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 378        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
 379        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 380                             0, 0),
 381        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 382                             0),
 383        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 384                             0, 0),
 385        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
 386                             0),
 387        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 388                             0, 0),
 389        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 390                             0),
 391        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 392                             1),
 393        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 394                             0, 0, 0),
 395        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 396                             0),
 397        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 398                             0),
 399        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 400                             0),
 401        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 402                             0),
 403        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 404                             0),
 405        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 406                             0, 0),
 407        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 408                             0),
 409        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 410                             0),
 411        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 412                             0, 0, 0),
 413        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 414                             0),
 415        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 416                             0),
 417        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 418                             0),
 419        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 420                             0),
 421        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 422                             0),
 423        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 424                             0, 0),
 425        AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 426                             0),
 427        AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
 428        AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 429        AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 430        AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 431        AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 432        AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
 433        AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 434        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
 435        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
 436                             1),
 437        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
 438                             1),
 439        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
 440                             1),
 441        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
 442                             0),
 443        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
 444                             0),
 445        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 446        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 447        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
 448        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
 449        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
 450        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
 451        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 452        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
 453        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
 454        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 455        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
 456        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
 457                             0),
 458        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 459        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
 460                             0),
 461        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
 462                             0, 0),
 463        AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
 464                             0),
 465        AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 466        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
 467        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
 468        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 469        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 470        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 471        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
 472        AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
 473        AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
 474        AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
 475        AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 476        AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
 477        AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 478        AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 479        AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 480        AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 481        AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 482        AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 483        AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 484        AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 485        AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 486        AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 487        AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 488        AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 489        AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 490        AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 491        AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 492        AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 493        AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 494        AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 495        AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
 496        AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
 497        AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
 498        AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
 499};
 500
 501static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 502{
 503        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 504        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
 505        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 506        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 507        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 508        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 509        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 510        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 511        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 512        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 513        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 514        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 515        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 516        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 517        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 518        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 519        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
 520        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 521        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 522        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 523};
 524
 525static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 526{
 527        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 528        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 529        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 530        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 531        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 532        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 533        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 534        SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 535        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 536        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 537        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 538        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 539        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 540        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 541        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 542        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 543        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
 544        SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 545};
 546
 547static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
 548{
 549        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
 550        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 551        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 552        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
 553        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
 554        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
 555        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
 556        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
 557        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
 558        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
 559        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
 560};
 561
 562static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 563{
 564        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 565        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 566        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 567        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 568        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 569        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 570        SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 571        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 572        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 573        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 574        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 575        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 576        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 577        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 578        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 579        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 580        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 581        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 582        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 583        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 584        SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
 585        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 586        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 587        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 588};
 589
 590static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 591{
 592        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 593        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 594        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 595        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 596        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 597        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 598        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 599};
 600
 601static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 602{
 603        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
 604        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 605        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 606        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
 607        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
 608        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
 609        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
 610        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
 611        SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 612        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 613        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
 614        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
 615        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
 616        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 617        SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 618        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 619        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
 620        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 621        SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 622};
 623
 624static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
 625{
 626        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 627        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 628        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 629        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
 630        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
 631        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 632        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 633        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 634        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 635        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 636        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 637        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
 638};
 639
 640static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 641{
 642        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
 643        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 644        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 645};
 646
 647static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 648{
 649        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 650        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 651        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 652        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 653        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 654        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 655        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 656        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 657        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 658        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 659        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 660        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 661        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 662        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 663        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 664        SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 665};
 666
 667static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 668{
 669        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 670        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 671        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 672        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 673        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 674        SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 675        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 676        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 677        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 678        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 679        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 680        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 681        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 682};
 683
 684static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 685{
 686        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 687        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
 688        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
 689        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
 690        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
 691        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 692        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 693        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
 694};
 695
 696static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 697{
 698        mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 699        mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 700        mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 701        mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 702        mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 703        mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 704        mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 705        mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 706};
 707
 708static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 709{
 710        mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 711        mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 712        mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 713        mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 714        mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 715        mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 716        mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 717        mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 718};
 719
 720#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 721#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 722#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 723#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
 724
 725static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 726static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 727static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 728static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 729static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 730                                 struct amdgpu_cu_info *cu_info);
 731static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 732static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 733static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 734static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 735static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 736                                          void *ras_error_status);
 737static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 738                                     void *inject_if);
 739
 740static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 741{
 742        switch (adev->asic_type) {
 743        case CHIP_VEGA10:
 744                soc15_program_register_sequence(adev,
 745                                                golden_settings_gc_9_0,
 746                                                ARRAY_SIZE(golden_settings_gc_9_0));
 747                soc15_program_register_sequence(adev,
 748                                                golden_settings_gc_9_0_vg10,
 749                                                ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 750                break;
 751        case CHIP_VEGA12:
 752                soc15_program_register_sequence(adev,
 753                                                golden_settings_gc_9_2_1,
 754                                                ARRAY_SIZE(golden_settings_gc_9_2_1));
 755                soc15_program_register_sequence(adev,
 756                                                golden_settings_gc_9_2_1_vg12,
 757                                                ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 758                break;
 759        case CHIP_VEGA20:
 760                soc15_program_register_sequence(adev,
 761                                                golden_settings_gc_9_0,
 762                                                ARRAY_SIZE(golden_settings_gc_9_0));
 763                soc15_program_register_sequence(adev,
 764                                                golden_settings_gc_9_0_vg20,
 765                                                ARRAY_SIZE(golden_settings_gc_9_0_vg20));
 766                break;
 767        case CHIP_ARCTURUS:
 768                soc15_program_register_sequence(adev,
 769                                                golden_settings_gc_9_4_1_arct,
 770                                                ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
 771                break;
 772        case CHIP_RAVEN:
 773                soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 774                                                ARRAY_SIZE(golden_settings_gc_9_1));
 775                if (adev->rev_id >= 8)
 776                        soc15_program_register_sequence(adev,
 777                                                        golden_settings_gc_9_1_rv2,
 778                                                        ARRAY_SIZE(golden_settings_gc_9_1_rv2));
 779                else
 780                        soc15_program_register_sequence(adev,
 781                                                        golden_settings_gc_9_1_rv1,
 782                                                        ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 783                break;
 784         case CHIP_RENOIR:
 785                soc15_program_register_sequence(adev,
 786                                                golden_settings_gc_9_1_rn,
 787                                                ARRAY_SIZE(golden_settings_gc_9_1_rn));
 788                return; /* for renoir, don't need common goldensetting */
 789        default:
 790                break;
 791        }
 792
 793        if (adev->asic_type != CHIP_ARCTURUS)
 794                soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 795                                                (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 796}
 797
 798static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 799{
 800        adev->gfx.scratch.num_reg = 8;
 801        adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 802        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 803}
 804
 805static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 806                                       bool wc, uint32_t reg, uint32_t val)
 807{
 808        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 809        amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 810                                WRITE_DATA_DST_SEL(0) |
 811                                (wc ? WR_CONFIRM : 0));
 812        amdgpu_ring_write(ring, reg);
 813        amdgpu_ring_write(ring, 0);
 814        amdgpu_ring_write(ring, val);
 815}
 816
 817static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 818                                  int mem_space, int opt, uint32_t addr0,
 819                                  uint32_t addr1, uint32_t ref, uint32_t mask,
 820                                  uint32_t inv)
 821{
 822        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 823        amdgpu_ring_write(ring,
 824                                 /* memory (1) or register (0) */
 825                                 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 826                                 WAIT_REG_MEM_OPERATION(opt) | /* wait */
 827                                 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 828                                 WAIT_REG_MEM_ENGINE(eng_sel)));
 829
 830        if (mem_space)
 831                BUG_ON(addr0 & 0x3); /* Dword align */
 832        amdgpu_ring_write(ring, addr0);
 833        amdgpu_ring_write(ring, addr1);
 834        amdgpu_ring_write(ring, ref);
 835        amdgpu_ring_write(ring, mask);
 836        amdgpu_ring_write(ring, inv); /* poll interval */
 837}
 838
 839static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 840{
 841        struct amdgpu_device *adev = ring->adev;
 842        uint32_t scratch;
 843        uint32_t tmp = 0;
 844        unsigned i;
 845        int r;
 846
 847        r = amdgpu_gfx_scratch_get(adev, &scratch);
 848        if (r)
 849                return r;
 850
 851        WREG32(scratch, 0xCAFEDEAD);
 852        r = amdgpu_ring_alloc(ring, 3);
 853        if (r)
 854                goto error_free_scratch;
 855
 856        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 857        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 858        amdgpu_ring_write(ring, 0xDEADBEEF);
 859        amdgpu_ring_commit(ring);
 860
 861        for (i = 0; i < adev->usec_timeout; i++) {
 862                tmp = RREG32(scratch);
 863                if (tmp == 0xDEADBEEF)
 864                        break;
 865                udelay(1);
 866        }
 867
 868        if (i >= adev->usec_timeout)
 869                r = -ETIMEDOUT;
 870
 871error_free_scratch:
 872        amdgpu_gfx_scratch_free(adev, scratch);
 873        return r;
 874}
 875
 876static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 877{
 878        struct amdgpu_device *adev = ring->adev;
 879        struct amdgpu_ib ib;
 880        struct dma_fence *f = NULL;
 881
 882        unsigned index;
 883        uint64_t gpu_addr;
 884        uint32_t tmp;
 885        long r;
 886
 887        r = amdgpu_device_wb_get(adev, &index);
 888        if (r)
 889                return r;
 890
 891        gpu_addr = adev->wb.gpu_addr + (index * 4);
 892        adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 893        memset(&ib, 0, sizeof(ib));
 894        r = amdgpu_ib_get(adev, NULL, 16, &ib);
 895        if (r)
 896                goto err1;
 897
 898        ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 899        ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 900        ib.ptr[2] = lower_32_bits(gpu_addr);
 901        ib.ptr[3] = upper_32_bits(gpu_addr);
 902        ib.ptr[4] = 0xDEADBEEF;
 903        ib.length_dw = 5;
 904
 905        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 906        if (r)
 907                goto err2;
 908
 909        r = dma_fence_wait_timeout(f, false, timeout);
 910        if (r == 0) {
 911                r = -ETIMEDOUT;
 912                goto err2;
 913        } else if (r < 0) {
 914                goto err2;
 915        }
 916
 917        tmp = adev->wb.wb[index];
 918        if (tmp == 0xDEADBEEF)
 919                r = 0;
 920        else
 921                r = -EINVAL;
 922
 923err2:
 924        amdgpu_ib_free(adev, &ib, NULL);
 925        dma_fence_put(f);
 926err1:
 927        amdgpu_device_wb_free(adev, index);
 928        return r;
 929}
 930
 931
 932static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 933{
 934        release_firmware(adev->gfx.pfp_fw);
 935        adev->gfx.pfp_fw = NULL;
 936        release_firmware(adev->gfx.me_fw);
 937        adev->gfx.me_fw = NULL;
 938        release_firmware(adev->gfx.ce_fw);
 939        adev->gfx.ce_fw = NULL;
 940        release_firmware(adev->gfx.rlc_fw);
 941        adev->gfx.rlc_fw = NULL;
 942        release_firmware(adev->gfx.mec_fw);
 943        adev->gfx.mec_fw = NULL;
 944        release_firmware(adev->gfx.mec2_fw);
 945        adev->gfx.mec2_fw = NULL;
 946
 947        kfree(adev->gfx.rlc.register_list_format);
 948}
 949
 950static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 951{
 952        const struct rlc_firmware_header_v2_1 *rlc_hdr;
 953
 954        rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
 955        adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
 956        adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
 957        adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
 958        adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
 959        adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
 960        adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
 961        adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
 962        adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
 963        adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
 964        adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
 965        adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
 966        adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
 967        adev->gfx.rlc.reg_list_format_direct_reg_list_length =
 968                        le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 969}
 970
 971static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
 972{
 973        adev->gfx.me_fw_write_wait = false;
 974        adev->gfx.mec_fw_write_wait = false;
 975
 976        if ((adev->gfx.mec_fw_version < 0x000001a5) ||
 977            (adev->gfx.mec_feature_version < 46) ||
 978            (adev->gfx.pfp_fw_version < 0x000000b7) ||
 979            (adev->gfx.pfp_feature_version < 46))
 980                DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
 981                              GRBM requires 1-cycle delay in cp firmware\n");
 982
 983        switch (adev->asic_type) {
 984        case CHIP_VEGA10:
 985                if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 986                    (adev->gfx.me_feature_version >= 42) &&
 987                    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
 988                    (adev->gfx.pfp_feature_version >= 42))
 989                        adev->gfx.me_fw_write_wait = true;
 990
 991                if ((adev->gfx.mec_fw_version >=  0x00000193) &&
 992                    (adev->gfx.mec_feature_version >= 42))
 993                        adev->gfx.mec_fw_write_wait = true;
 994                break;
 995        case CHIP_VEGA12:
 996                if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 997                    (adev->gfx.me_feature_version >= 44) &&
 998                    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
 999                    (adev->gfx.pfp_feature_version >= 44))
1000                        adev->gfx.me_fw_write_wait = true;
1001
1002                if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1003                    (adev->gfx.mec_feature_version >= 44))
1004                        adev->gfx.mec_fw_write_wait = true;
1005                break;
1006        case CHIP_VEGA20:
1007                if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1008                    (adev->gfx.me_feature_version >= 44) &&
1009                    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1010                    (adev->gfx.pfp_feature_version >= 44))
1011                        adev->gfx.me_fw_write_wait = true;
1012
1013                if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1014                    (adev->gfx.mec_feature_version >= 44))
1015                        adev->gfx.mec_fw_write_wait = true;
1016                break;
1017        case CHIP_RAVEN:
1018                if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1019                    (adev->gfx.me_feature_version >= 42) &&
1020                    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1021                    (adev->gfx.pfp_feature_version >= 42))
1022                        adev->gfx.me_fw_write_wait = true;
1023
1024                if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1025                    (adev->gfx.mec_feature_version >= 42))
1026                        adev->gfx.mec_fw_write_wait = true;
1027                break;
1028        default:
1029                break;
1030        }
1031}
1032
1033static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1034{
1035        switch (adev->asic_type) {
1036        case CHIP_VEGA10:
1037        case CHIP_VEGA12:
1038        case CHIP_VEGA20:
1039                break;
1040        case CHIP_RAVEN:
1041                /* Disable GFXOFF on original raven.  There are combinations
1042                 * of sbios and platforms that are not stable.
1043                 */
1044                if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
1045                        adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1046                else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1047                         &&((adev->gfx.rlc_fw_version != 106 &&
1048                             adev->gfx.rlc_fw_version < 531) ||
1049                            (adev->gfx.rlc_fw_version == 53815) ||
1050                            (adev->gfx.rlc_feature_version < 1) ||
1051                            !adev->gfx.rlc.is_rlc_v2_1))
1052                        adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1053
1054                if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1055                        adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1056                                AMD_PG_SUPPORT_CP |
1057                                AMD_PG_SUPPORT_RLC_SMU_HS;
1058                break;
1059        case CHIP_RENOIR:
1060                if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1061                        adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1062                                AMD_PG_SUPPORT_CP |
1063                                AMD_PG_SUPPORT_RLC_SMU_HS;
1064                break;
1065        default:
1066                break;
1067        }
1068}
1069
1070static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1071                                          const char *chip_name)
1072{
1073        char fw_name[30];
1074        int err;
1075        struct amdgpu_firmware_info *info = NULL;
1076        const struct common_firmware_header *header = NULL;
1077        const struct gfx_firmware_header_v1_0 *cp_hdr;
1078
1079        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1080        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1081        if (err)
1082                goto out;
1083        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1084        if (err)
1085                goto out;
1086        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1087        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1088        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1089
1090        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1091        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1092        if (err)
1093                goto out;
1094        err = amdgpu_ucode_validate(adev->gfx.me_fw);
1095        if (err)
1096                goto out;
1097        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1098        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1099        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1100
1101        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1102        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1103        if (err)
1104                goto out;
1105        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1106        if (err)
1107                goto out;
1108        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1109        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1110        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1111
1112        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1113                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1114                info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1115                info->fw = adev->gfx.pfp_fw;
1116                header = (const struct common_firmware_header *)info->fw->data;
1117                adev->firmware.fw_size +=
1118                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1119
1120                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1121                info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1122                info->fw = adev->gfx.me_fw;
1123                header = (const struct common_firmware_header *)info->fw->data;
1124                adev->firmware.fw_size +=
1125                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1126
1127                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1128                info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1129                info->fw = adev->gfx.ce_fw;
1130                header = (const struct common_firmware_header *)info->fw->data;
1131                adev->firmware.fw_size +=
1132                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1133        }
1134
1135out:
1136        if (err) {
1137                dev_err(adev->dev,
1138                        "gfx9: Failed to load firmware \"%s\"\n",
1139                        fw_name);
1140                release_firmware(adev->gfx.pfp_fw);
1141                adev->gfx.pfp_fw = NULL;
1142                release_firmware(adev->gfx.me_fw);
1143                adev->gfx.me_fw = NULL;
1144                release_firmware(adev->gfx.ce_fw);
1145                adev->gfx.ce_fw = NULL;
1146        }
1147        return err;
1148}
1149
1150static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1151                                          const char *chip_name)
1152{
1153        char fw_name[30];
1154        int err;
1155        struct amdgpu_firmware_info *info = NULL;
1156        const struct common_firmware_header *header = NULL;
1157        const struct rlc_firmware_header_v2_0 *rlc_hdr;
1158        unsigned int *tmp = NULL;
1159        unsigned int i = 0;
1160        uint16_t version_major;
1161        uint16_t version_minor;
1162        uint32_t smu_version;
1163
1164        /*
1165         * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1166         * instead of picasso_rlc.bin.
1167         * Judgment method:
1168         * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1169         *          or revision >= 0xD8 && revision <= 0xDF
1170         * otherwise is PCO FP5
1171         */
1172        if (!strcmp(chip_name, "picasso") &&
1173                (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1174                ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1175                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1176        else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1177                (smu_version >= 0x41e2b))
1178                /**
1179                *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1180                */
1181                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1182        else
1183                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1184        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1185        if (err)
1186                goto out;
1187        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1188        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1189
1190        version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1191        version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1192        if (version_major == 2 && version_minor == 1)
1193                adev->gfx.rlc.is_rlc_v2_1 = true;
1194
1195        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1196        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1197        adev->gfx.rlc.save_and_restore_offset =
1198                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
1199        adev->gfx.rlc.clear_state_descriptor_offset =
1200                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1201        adev->gfx.rlc.avail_scratch_ram_locations =
1202                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1203        adev->gfx.rlc.reg_restore_list_size =
1204                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
1205        adev->gfx.rlc.reg_list_format_start =
1206                        le32_to_cpu(rlc_hdr->reg_list_format_start);
1207        adev->gfx.rlc.reg_list_format_separate_start =
1208                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1209        adev->gfx.rlc.starting_offsets_start =
1210                        le32_to_cpu(rlc_hdr->starting_offsets_start);
1211        adev->gfx.rlc.reg_list_format_size_bytes =
1212                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1213        adev->gfx.rlc.reg_list_size_bytes =
1214                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1215        adev->gfx.rlc.register_list_format =
1216                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1217                                adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1218        if (!adev->gfx.rlc.register_list_format) {
1219                err = -ENOMEM;
1220                goto out;
1221        }
1222
1223        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1224                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1225        for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1226                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1227
1228        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1229
1230        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1231                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1232        for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1233                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1234
1235        if (adev->gfx.rlc.is_rlc_v2_1)
1236                gfx_v9_0_init_rlc_ext_microcode(adev);
1237
1238        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1239                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1240                info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1241                info->fw = adev->gfx.rlc_fw;
1242                header = (const struct common_firmware_header *)info->fw->data;
1243                adev->firmware.fw_size +=
1244                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1245
1246                if (adev->gfx.rlc.is_rlc_v2_1 &&
1247                    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1248                    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1249                    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1250                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1251                        info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1252                        info->fw = adev->gfx.rlc_fw;
1253                        adev->firmware.fw_size +=
1254                                ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1255
1256                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1257                        info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1258                        info->fw = adev->gfx.rlc_fw;
1259                        adev->firmware.fw_size +=
1260                                ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1261
1262                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1263                        info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1264                        info->fw = adev->gfx.rlc_fw;
1265                        adev->firmware.fw_size +=
1266                                ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1267                }
1268        }
1269
1270out:
1271        if (err) {
1272                dev_err(adev->dev,
1273                        "gfx9: Failed to load firmware \"%s\"\n",
1274                        fw_name);
1275                release_firmware(adev->gfx.rlc_fw);
1276                adev->gfx.rlc_fw = NULL;
1277        }
1278        return err;
1279}
1280
1281static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1282                                          const char *chip_name)
1283{
1284        char fw_name[30];
1285        int err;
1286        struct amdgpu_firmware_info *info = NULL;
1287        const struct common_firmware_header *header = NULL;
1288        const struct gfx_firmware_header_v1_0 *cp_hdr;
1289
1290        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1291        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1292        if (err)
1293                goto out;
1294        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1295        if (err)
1296                goto out;
1297        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1298        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1299        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1300
1301
1302        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1303        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1304        if (!err) {
1305                err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1306                if (err)
1307                        goto out;
1308                cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1309                adev->gfx.mec2_fw->data;
1310                adev->gfx.mec2_fw_version =
1311                le32_to_cpu(cp_hdr->header.ucode_version);
1312                adev->gfx.mec2_feature_version =
1313                le32_to_cpu(cp_hdr->ucode_feature_version);
1314        } else {
1315                err = 0;
1316                adev->gfx.mec2_fw = NULL;
1317        }
1318
1319        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1320                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1321                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1322                info->fw = adev->gfx.mec_fw;
1323                header = (const struct common_firmware_header *)info->fw->data;
1324                cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1325                adev->firmware.fw_size +=
1326                        ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1327
1328                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1329                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1330                info->fw = adev->gfx.mec_fw;
1331                adev->firmware.fw_size +=
1332                        ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1333
1334                if (adev->gfx.mec2_fw) {
1335                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1336                        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1337                        info->fw = adev->gfx.mec2_fw;
1338                        header = (const struct common_firmware_header *)info->fw->data;
1339                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1340                        adev->firmware.fw_size +=
1341                                ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1342
1343                        /* TODO: Determine if MEC2 JT FW loading can be removed
1344                                 for all GFX V9 asic and above */
1345                        if (adev->asic_type != CHIP_ARCTURUS) {
1346                                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1347                                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1348                                info->fw = adev->gfx.mec2_fw;
1349                                adev->firmware.fw_size +=
1350                                        ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1351                                        PAGE_SIZE);
1352                        }
1353                }
1354        }
1355
1356out:
1357        gfx_v9_0_check_if_need_gfxoff(adev);
1358        gfx_v9_0_check_fw_write_wait(adev);
1359        if (err) {
1360                dev_err(adev->dev,
1361                        "gfx9: Failed to load firmware \"%s\"\n",
1362                        fw_name);
1363                release_firmware(adev->gfx.mec_fw);
1364                adev->gfx.mec_fw = NULL;
1365                release_firmware(adev->gfx.mec2_fw);
1366                adev->gfx.mec2_fw = NULL;
1367        }
1368        return err;
1369}
1370
1371static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1372{
1373        const char *chip_name;
1374        int r;
1375
1376        DRM_DEBUG("\n");
1377
1378        switch (adev->asic_type) {
1379        case CHIP_VEGA10:
1380                chip_name = "vega10";
1381                break;
1382        case CHIP_VEGA12:
1383                chip_name = "vega12";
1384                break;
1385        case CHIP_VEGA20:
1386                chip_name = "vega20";
1387                break;
1388        case CHIP_RAVEN:
1389                if (adev->rev_id >= 8)
1390                        chip_name = "raven2";
1391                else if (adev->pdev->device == 0x15d8)
1392                        chip_name = "picasso";
1393                else
1394                        chip_name = "raven";
1395                break;
1396        case CHIP_ARCTURUS:
1397                chip_name = "arcturus";
1398                break;
1399        case CHIP_RENOIR:
1400                chip_name = "renoir";
1401                break;
1402        default:
1403                BUG();
1404        }
1405
1406        /* No CPG in Arcturus */
1407        if (adev->asic_type != CHIP_ARCTURUS) {
1408                r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1409                if (r)
1410                        return r;
1411        }
1412
1413        r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1414        if (r)
1415                return r;
1416
1417        r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1418        if (r)
1419                return r;
1420
1421        return r;
1422}
1423
1424static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1425{
1426        u32 count = 0;
1427        const struct cs_section_def *sect = NULL;
1428        const struct cs_extent_def *ext = NULL;
1429
1430        /* begin clear state */
1431        count += 2;
1432        /* context control state */
1433        count += 3;
1434
1435        for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1436                for (ext = sect->section; ext->extent != NULL; ++ext) {
1437                        if (sect->id == SECT_CONTEXT)
1438                                count += 2 + ext->reg_count;
1439                        else
1440                                return 0;
1441                }
1442        }
1443
1444        /* end clear state */
1445        count += 2;
1446        /* clear state */
1447        count += 2;
1448
1449        return count;
1450}
1451
1452static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1453                                    volatile u32 *buffer)
1454{
1455        u32 count = 0, i;
1456        const struct cs_section_def *sect = NULL;
1457        const struct cs_extent_def *ext = NULL;
1458
1459        if (adev->gfx.rlc.cs_data == NULL)
1460                return;
1461        if (buffer == NULL)
1462                return;
1463
1464        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1465        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1466
1467        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1468        buffer[count++] = cpu_to_le32(0x80000000);
1469        buffer[count++] = cpu_to_le32(0x80000000);
1470
1471        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1472                for (ext = sect->section; ext->extent != NULL; ++ext) {
1473                        if (sect->id == SECT_CONTEXT) {
1474                                buffer[count++] =
1475                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1476                                buffer[count++] = cpu_to_le32(ext->reg_index -
1477                                                PACKET3_SET_CONTEXT_REG_START);
1478                                for (i = 0; i < ext->reg_count; i++)
1479                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1480                        } else {
1481                                return;
1482                        }
1483                }
1484        }
1485
1486        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1487        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1488
1489        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1490        buffer[count++] = cpu_to_le32(0);
1491}
1492
1493static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1494{
1495        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1496        uint32_t pg_always_on_cu_num = 2;
1497        uint32_t always_on_cu_num;
1498        uint32_t i, j, k;
1499        uint32_t mask, cu_bitmap, counter;
1500
1501        if (adev->flags & AMD_IS_APU)
1502                always_on_cu_num = 4;
1503        else if (adev->asic_type == CHIP_VEGA12)
1504                always_on_cu_num = 8;
1505        else
1506                always_on_cu_num = 12;
1507
1508        mutex_lock(&adev->grbm_idx_mutex);
1509        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1510                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1511                        mask = 1;
1512                        cu_bitmap = 0;
1513                        counter = 0;
1514                        gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1515
1516                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1517                                if (cu_info->bitmap[i][j] & mask) {
1518                                        if (counter == pg_always_on_cu_num)
1519                                                WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1520                                        if (counter < always_on_cu_num)
1521                                                cu_bitmap |= mask;
1522                                        else
1523                                                break;
1524                                        counter++;
1525                                }
1526                                mask <<= 1;
1527                        }
1528
1529                        WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1530                        cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1531                }
1532        }
1533        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1534        mutex_unlock(&adev->grbm_idx_mutex);
1535}
1536
1537static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1538{
1539        uint32_t data;
1540
1541        /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1542        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1543        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1544        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1545        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1546
1547        /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1548        WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1549
1550        /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1551        WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1552
1553        mutex_lock(&adev->grbm_idx_mutex);
1554        /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1555        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1556        WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1557
1558        /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1559        data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1560        data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1561        data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1562        WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1563
1564        /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1565        data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1566        data &= 0x0000FFFF;
1567        data |= 0x00C00000;
1568        WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1569
1570        /*
1571         * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1572         * programmed in gfx_v9_0_init_always_on_cu_mask()
1573         */
1574
1575        /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1576         * but used for RLC_LB_CNTL configuration */
1577        data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1578        data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1579        data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1580        WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1581        mutex_unlock(&adev->grbm_idx_mutex);
1582
1583        gfx_v9_0_init_always_on_cu_mask(adev);
1584}
1585
1586static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1587{
1588        uint32_t data;
1589
1590        /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1591        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1592        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1593        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1594        WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1595
1596        /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1597        WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1598
1599        /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1600        WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1601
1602        mutex_lock(&adev->grbm_idx_mutex);
1603        /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1604        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1605        WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1606
1607        /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1608        data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1609        data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1610        data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1611        WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1612
1613        /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1614        data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1615        data &= 0x0000FFFF;
1616        data |= 0x00C00000;
1617        WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1618
1619        /*
1620         * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1621         * programmed in gfx_v9_0_init_always_on_cu_mask()
1622         */
1623
1624        /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1625         * but used for RLC_LB_CNTL configuration */
1626        data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1627        data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1628        data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1629        WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1630        mutex_unlock(&adev->grbm_idx_mutex);
1631
1632        gfx_v9_0_init_always_on_cu_mask(adev);
1633}
1634
1635static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1636{
1637        WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1638}
1639
1640static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1641{
1642        return 5;
1643}
1644
1645static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1646{
1647        const struct cs_section_def *cs_data;
1648        int r;
1649
1650        adev->gfx.rlc.cs_data = gfx9_cs_data;
1651
1652        cs_data = adev->gfx.rlc.cs_data;
1653
1654        if (cs_data) {
1655                /* init clear state block */
1656                r = amdgpu_gfx_rlc_init_csb(adev);
1657                if (r)
1658                        return r;
1659        }
1660
1661        if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1662                /* TODO: double check the cp_table_size for RV */
1663                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1664                r = amdgpu_gfx_rlc_init_cpt(adev);
1665                if (r)
1666                        return r;
1667        }
1668
1669        switch (adev->asic_type) {
1670        case CHIP_RAVEN:
1671                gfx_v9_0_init_lbpw(adev);
1672                break;
1673        case CHIP_VEGA20:
1674                gfx_v9_4_init_lbpw(adev);
1675                break;
1676        default:
1677                break;
1678        }
1679
1680        return 0;
1681}
1682
1683static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1684{
1685        int r;
1686
1687        r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1688        if (unlikely(r != 0))
1689                return r;
1690
1691        r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1692                        AMDGPU_GEM_DOMAIN_VRAM);
1693        if (!r)
1694                adev->gfx.rlc.clear_state_gpu_addr =
1695                        amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1696
1697        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1698
1699        return r;
1700}
1701
1702static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1703{
1704        int r;
1705
1706        if (!adev->gfx.rlc.clear_state_obj)
1707                return;
1708
1709        r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1710        if (likely(r == 0)) {
1711                amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1712                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1713        }
1714}
1715
1716static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1717{
1718        amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1719        amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1720}
1721
1722static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1723{
1724        int r;
1725        u32 *hpd;
1726        const __le32 *fw_data;
1727        unsigned fw_size;
1728        u32 *fw;
1729        size_t mec_hpd_size;
1730
1731        const struct gfx_firmware_header_v1_0 *mec_hdr;
1732
1733        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1734
1735        /* take ownership of the relevant compute queues */
1736        amdgpu_gfx_compute_queue_acquire(adev);
1737        mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1738
1739        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1740                                      AMDGPU_GEM_DOMAIN_VRAM,
1741                                      &adev->gfx.mec.hpd_eop_obj,
1742                                      &adev->gfx.mec.hpd_eop_gpu_addr,
1743                                      (void **)&hpd);
1744        if (r) {
1745                dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1746                gfx_v9_0_mec_fini(adev);
1747                return r;
1748        }
1749
1750        memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1751
1752        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1753        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1754
1755        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1756
1757        fw_data = (const __le32 *)
1758                (adev->gfx.mec_fw->data +
1759                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1760        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1761
1762        r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1763                                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1764                                      &adev->gfx.mec.mec_fw_obj,
1765                                      &adev->gfx.mec.mec_fw_gpu_addr,
1766                                      (void **)&fw);
1767        if (r) {
1768                dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1769                gfx_v9_0_mec_fini(adev);
1770                return r;
1771        }
1772
1773        memcpy(fw, fw_data, fw_size);
1774
1775        amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1776        amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1777
1778        return 0;
1779}
1780
1781static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1782{
1783        WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1784                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1785                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1786                (address << SQ_IND_INDEX__INDEX__SHIFT) |
1787                (SQ_IND_INDEX__FORCE_READ_MASK));
1788        return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1789}
1790
1791static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1792                           uint32_t wave, uint32_t thread,
1793                           uint32_t regno, uint32_t num, uint32_t *out)
1794{
1795        WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1796                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1797                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1798                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1799                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1800                (SQ_IND_INDEX__FORCE_READ_MASK) |
1801                (SQ_IND_INDEX__AUTO_INCR_MASK));
1802        while (num--)
1803                *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1804}
1805
1806static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1807{
1808        /* type 1 wave data */
1809        dst[(*no_fields)++] = 1;
1810        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1811        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1812        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1813        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1814        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1815        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1816        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1817        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1818        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1819        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1820        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1821        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1822        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1823        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1824}
1825
1826static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1827                                     uint32_t wave, uint32_t start,
1828                                     uint32_t size, uint32_t *dst)
1829{
1830        wave_read_regs(
1831                adev, simd, wave, 0,
1832                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1833}
1834
1835static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1836                                     uint32_t wave, uint32_t thread,
1837                                     uint32_t start, uint32_t size,
1838                                     uint32_t *dst)
1839{
1840        wave_read_regs(
1841                adev, simd, wave, thread,
1842                start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1843}
1844
1845static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1846                                  u32 me, u32 pipe, u32 q, u32 vm)
1847{
1848        soc15_grbm_select(adev, me, pipe, q, vm);
1849}
1850
1851static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1852        .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1853        .select_se_sh = &gfx_v9_0_select_se_sh,
1854        .read_wave_data = &gfx_v9_0_read_wave_data,
1855        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1856        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1857        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1858        .ras_error_inject = &gfx_v9_0_ras_error_inject,
1859        .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1860};
1861
1862static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1863{
1864        u32 gb_addr_config;
1865        int err;
1866
1867        adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1868
1869        switch (adev->asic_type) {
1870        case CHIP_VEGA10:
1871                adev->gfx.config.max_hw_contexts = 8;
1872                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1873                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1874                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1875                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1876                gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1877                break;
1878        case CHIP_VEGA12:
1879                adev->gfx.config.max_hw_contexts = 8;
1880                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1881                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1882                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1883                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1884                gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1885                DRM_INFO("fix gfx.config for vega12\n");
1886                break;
1887        case CHIP_VEGA20:
1888                adev->gfx.config.max_hw_contexts = 8;
1889                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893                gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1894                gb_addr_config &= ~0xf3e777ff;
1895                gb_addr_config |= 0x22014042;
1896                /* check vbios table if gpu info is not available */
1897                err = amdgpu_atomfirmware_get_gfx_info(adev);
1898                if (err)
1899                        return err;
1900                break;
1901        case CHIP_RAVEN:
1902                adev->gfx.config.max_hw_contexts = 8;
1903                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1904                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1905                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1906                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1907                if (adev->rev_id >= 8)
1908                        gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1909                else
1910                        gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1911                break;
1912        case CHIP_ARCTURUS:
1913                adev->gfx.config.max_hw_contexts = 8;
1914                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1915                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1916                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1917                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1918                gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1919                gb_addr_config &= ~0xf3e777ff;
1920                gb_addr_config |= 0x22014042;
1921                break;
1922        case CHIP_RENOIR:
1923                adev->gfx.config.max_hw_contexts = 8;
1924                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1925                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1926                adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1927                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1928                gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1929                gb_addr_config &= ~0xf3e777ff;
1930                gb_addr_config |= 0x22010042;
1931                break;
1932        default:
1933                BUG();
1934                break;
1935        }
1936
1937        adev->gfx.config.gb_addr_config = gb_addr_config;
1938
1939        adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1940                        REG_GET_FIELD(
1941                                        adev->gfx.config.gb_addr_config,
1942                                        GB_ADDR_CONFIG,
1943                                        NUM_PIPES);
1944
1945        adev->gfx.config.max_tile_pipes =
1946                adev->gfx.config.gb_addr_config_fields.num_pipes;
1947
1948        adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1949                        REG_GET_FIELD(
1950                                        adev->gfx.config.gb_addr_config,
1951                                        GB_ADDR_CONFIG,
1952                                        NUM_BANKS);
1953        adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1954                        REG_GET_FIELD(
1955                                        adev->gfx.config.gb_addr_config,
1956                                        GB_ADDR_CONFIG,
1957                                        MAX_COMPRESSED_FRAGS);
1958        adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1959                        REG_GET_FIELD(
1960                                        adev->gfx.config.gb_addr_config,
1961                                        GB_ADDR_CONFIG,
1962                                        NUM_RB_PER_SE);
1963        adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1964                        REG_GET_FIELD(
1965                                        adev->gfx.config.gb_addr_config,
1966                                        GB_ADDR_CONFIG,
1967                                        NUM_SHADER_ENGINES);
1968        adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1969                        REG_GET_FIELD(
1970                                        adev->gfx.config.gb_addr_config,
1971                                        GB_ADDR_CONFIG,
1972                                        PIPE_INTERLEAVE_SIZE));
1973
1974        return 0;
1975}
1976
1977static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1978                                   struct amdgpu_ngg_buf *ngg_buf,
1979                                   int size_se,
1980                                   int default_size_se)
1981{
1982        int r;
1983
1984        if (size_se < 0) {
1985                dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1986                return -EINVAL;
1987        }
1988        size_se = size_se ? size_se : default_size_se;
1989
1990        ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1991        r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1992                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1993                                    &ngg_buf->bo,
1994                                    &ngg_buf->gpu_addr,
1995                                    NULL);
1996        if (r) {
1997                dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1998                return r;
1999        }
2000        ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
2001
2002        return r;
2003}
2004
2005static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
2006{
2007        int i;
2008
2009        for (i = 0; i < NGG_BUF_MAX; i++)
2010                amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
2011                                      &adev->gfx.ngg.buf[i].gpu_addr,
2012                                      NULL);
2013
2014        memset(&adev->gfx.ngg.buf[0], 0,
2015                        sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
2016
2017        adev->gfx.ngg.init = false;
2018
2019        return 0;
2020}
2021
2022static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2023{
2024        int r;
2025
2026        if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2027                return 0;
2028
2029        /* GDS reserve memory: 64 bytes alignment */
2030        adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2031        adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2032        adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2033        adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2034
2035        /* Primitive Buffer */
2036        r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2037                                    amdgpu_prim_buf_per_se,
2038                                    64 * 1024);
2039        if (r) {
2040                dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2041                goto err;
2042        }
2043
2044        /* Position Buffer */
2045        r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2046                                    amdgpu_pos_buf_per_se,
2047                                    256 * 1024);
2048        if (r) {
2049                dev_err(adev->dev, "Failed to create Position Buffer\n");
2050                goto err;
2051        }
2052
2053        /* Control Sideband */
2054        r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2055                                    amdgpu_cntl_sb_buf_per_se,
2056                                    256);
2057        if (r) {
2058                dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2059                goto err;
2060        }
2061
2062        /* Parameter Cache, not created by default */
2063        if (amdgpu_param_buf_per_se <= 0)
2064                goto out;
2065
2066        r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2067                                    amdgpu_param_buf_per_se,
2068                                    512 * 1024);
2069        if (r) {
2070                dev_err(adev->dev, "Failed to create Parameter Cache\n");
2071                goto err;
2072        }
2073
2074out:
2075        adev->gfx.ngg.init = true;
2076        return 0;
2077err:
2078        gfx_v9_0_ngg_fini(adev);
2079        return r;
2080}
2081
2082static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2083{
2084        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2085        int r;
2086        u32 data, base;
2087
2088        if (!amdgpu_ngg)
2089                return 0;
2090
2091        /* Program buffer size */
2092        data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2093                             adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2094        data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2095                             adev->gfx.ngg.buf[NGG_POS].size >> 8);
2096        WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2097
2098        data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2099                             adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2100        data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2101                             adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2102        WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2103
2104        /* Program buffer base address */
2105        base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2106        data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2107        WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2108
2109        base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2110        data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2111        WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2112
2113        base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2114        data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2115        WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2116
2117        base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2118        data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2119        WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2120
2121        base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2122        data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2123        WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2124
2125        base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2126        data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2127        WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2128
2129        /* Clear GDS reserved memory */
2130        r = amdgpu_ring_alloc(ring, 17);
2131        if (r) {
2132                DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2133                          ring->name, r);
2134                return r;
2135        }
2136
2137        gfx_v9_0_write_data_to_reg(ring, 0, false,
2138                                   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2139                                   (adev->gds.gds_size +
2140                                    adev->gfx.ngg.gds_reserve_size));
2141
2142        amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2143        amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2144                                PACKET3_DMA_DATA_DST_SEL(1) |
2145                                PACKET3_DMA_DATA_SRC_SEL(2)));
2146        amdgpu_ring_write(ring, 0);
2147        amdgpu_ring_write(ring, 0);
2148        amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2149        amdgpu_ring_write(ring, 0);
2150        amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2151                                adev->gfx.ngg.gds_reserve_size);
2152
2153        gfx_v9_0_write_data_to_reg(ring, 0, false,
2154                                   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2155
2156        amdgpu_ring_commit(ring);
2157
2158        return 0;
2159}
2160
2161static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2162                                      int mec, int pipe, int queue)
2163{
2164        int r;
2165        unsigned irq_type;
2166        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2167
2168        ring = &adev->gfx.compute_ring[ring_id];
2169
2170        /* mec0 is me1 */
2171        ring->me = mec + 1;
2172        ring->pipe = pipe;
2173        ring->queue = queue;
2174
2175        ring->ring_obj = NULL;
2176        ring->use_doorbell = true;
2177        ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2178        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2179                                + (ring_id * GFX9_MEC_HPD_SIZE);
2180        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2181
2182        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2183                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2184                + ring->pipe;
2185
2186        /* type-2 packets are deprecated on MEC, use type-3 instead */
2187        r = amdgpu_ring_init(adev, ring, 1024,
2188                             &adev->gfx.eop_irq, irq_type);
2189        if (r)
2190                return r;
2191
2192
2193        return 0;
2194}
2195
2196static int gfx_v9_0_sw_init(void *handle)
2197{
2198        int i, j, k, r, ring_id;
2199        struct amdgpu_ring *ring;
2200        struct amdgpu_kiq *kiq;
2201        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2202
2203        switch (adev->asic_type) {
2204        case CHIP_VEGA10:
2205        case CHIP_VEGA12:
2206        case CHIP_VEGA20:
2207        case CHIP_RAVEN:
2208        case CHIP_ARCTURUS:
2209        case CHIP_RENOIR:
2210                adev->gfx.mec.num_mec = 2;
2211                break;
2212        default:
2213                adev->gfx.mec.num_mec = 1;
2214                break;
2215        }
2216
2217        adev->gfx.mec.num_pipe_per_mec = 4;
2218        adev->gfx.mec.num_queue_per_pipe = 8;
2219
2220        /* EOP Event */
2221        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2222        if (r)
2223                return r;
2224
2225        /* Privileged reg */
2226        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2227                              &adev->gfx.priv_reg_irq);
2228        if (r)
2229                return r;
2230
2231        /* Privileged inst */
2232        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2233                              &adev->gfx.priv_inst_irq);
2234        if (r)
2235                return r;
2236
2237        /* ECC error */
2238        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2239                              &adev->gfx.cp_ecc_error_irq);
2240        if (r)
2241                return r;
2242
2243        /* FUE error */
2244        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2245                              &adev->gfx.cp_ecc_error_irq);
2246        if (r)
2247                return r;
2248
2249        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2250
2251        gfx_v9_0_scratch_init(adev);
2252
2253        r = gfx_v9_0_init_microcode(adev);
2254        if (r) {
2255                DRM_ERROR("Failed to load gfx firmware!\n");
2256                return r;
2257        }
2258
2259        r = adev->gfx.rlc.funcs->init(adev);
2260        if (r) {
2261                DRM_ERROR("Failed to init rlc BOs!\n");
2262                return r;
2263        }
2264
2265        r = gfx_v9_0_mec_init(adev);
2266        if (r) {
2267                DRM_ERROR("Failed to init MEC BOs!\n");
2268                return r;
2269        }
2270
2271        /* set up the gfx ring */
2272        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2273                ring = &adev->gfx.gfx_ring[i];
2274                ring->ring_obj = NULL;
2275                if (!i)
2276                        sprintf(ring->name, "gfx");
2277                else
2278                        sprintf(ring->name, "gfx_%d", i);
2279                ring->use_doorbell = true;
2280                ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2281                r = amdgpu_ring_init(adev, ring, 1024,
2282                                     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2283                if (r)
2284                        return r;
2285        }
2286
2287        /* set up the compute queues - allocate horizontally across pipes */
2288        ring_id = 0;
2289        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2290                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2291                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2292                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2293                                        continue;
2294
2295                                r = gfx_v9_0_compute_ring_init(adev,
2296                                                               ring_id,
2297                                                               i, k, j);
2298                                if (r)
2299                                        return r;
2300
2301                                ring_id++;
2302                        }
2303                }
2304        }
2305
2306        r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2307        if (r) {
2308                DRM_ERROR("Failed to init KIQ BOs!\n");
2309                return r;
2310        }
2311
2312        kiq = &adev->gfx.kiq;
2313        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2314        if (r)
2315                return r;
2316
2317        /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2318        r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2319        if (r)
2320                return r;
2321
2322        adev->gfx.ce_ram_size = 0x8000;
2323
2324        r = gfx_v9_0_gpu_early_init(adev);
2325        if (r)
2326                return r;
2327
2328        r = gfx_v9_0_ngg_init(adev);
2329        if (r)
2330                return r;
2331
2332        return 0;
2333}
2334
2335
2336static int gfx_v9_0_sw_fini(void *handle)
2337{
2338        int i;
2339        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2340
2341        if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2342                        adev->gfx.ras_if) {
2343                struct ras_common_if *ras_if = adev->gfx.ras_if;
2344                struct ras_ih_if ih_info = {
2345                        .head = *ras_if,
2346                };
2347
2348                amdgpu_ras_debugfs_remove(adev, ras_if);
2349                amdgpu_ras_sysfs_remove(adev, ras_if);
2350                amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2351                amdgpu_ras_feature_enable(adev, ras_if, 0);
2352                kfree(ras_if);
2353        }
2354
2355        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2356                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2357        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2358                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2359
2360        amdgpu_gfx_mqd_sw_fini(adev);
2361        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2362        amdgpu_gfx_kiq_fini(adev);
2363
2364        gfx_v9_0_mec_fini(adev);
2365        gfx_v9_0_ngg_fini(adev);
2366        amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2367        if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2368                amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2369                                &adev->gfx.rlc.cp_table_gpu_addr,
2370                                (void **)&adev->gfx.rlc.cp_table_ptr);
2371        }
2372        gfx_v9_0_free_microcode(adev);
2373
2374        return 0;
2375}
2376
2377
2378static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2379{
2380        /* TODO */
2381}
2382
2383static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2384{
2385        u32 data;
2386
2387        if (instance == 0xffffffff)
2388                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2389        else
2390                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2391
2392        if (se_num == 0xffffffff)
2393                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2394        else
2395                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2396
2397        if (sh_num == 0xffffffff)
2398                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2399        else
2400                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2401
2402        WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2403}
2404
2405static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2406{
2407        u32 data, mask;
2408
2409        data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2410        data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2411
2412        data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2413        data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2414
2415        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2416                                         adev->gfx.config.max_sh_per_se);
2417
2418        return (~data) & mask;
2419}
2420
2421static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2422{
2423        int i, j;
2424        u32 data;
2425        u32 active_rbs = 0;
2426        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2427                                        adev->gfx.config.max_sh_per_se;
2428
2429        mutex_lock(&adev->grbm_idx_mutex);
2430        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2431                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2432                        gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2433                        data = gfx_v9_0_get_rb_active_bitmap(adev);
2434                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2435                                               rb_bitmap_width_per_sh);
2436                }
2437        }
2438        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2439        mutex_unlock(&adev->grbm_idx_mutex);
2440
2441        adev->gfx.config.backend_enable_mask = active_rbs;
2442        adev->gfx.config.num_rbs = hweight32(active_rbs);
2443}
2444
2445#define DEFAULT_SH_MEM_BASES    (0x6000)
2446#define FIRST_COMPUTE_VMID      (8)
2447#define LAST_COMPUTE_VMID       (16)
2448static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2449{
2450        int i;
2451        uint32_t sh_mem_config;
2452        uint32_t sh_mem_bases;
2453
2454        /*
2455         * Configure apertures:
2456         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2457         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2458         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2459         */
2460        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2461
2462        sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2463                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2464                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2465
2466        mutex_lock(&adev->srbm_mutex);
2467        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2468                soc15_grbm_select(adev, 0, 0, 0, i);
2469                /* CP and shaders */
2470                WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2471                WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2472        }
2473        soc15_grbm_select(adev, 0, 0, 0, 0);
2474        mutex_unlock(&adev->srbm_mutex);
2475
2476        /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2477           acccess. These should be enabled by FW for target VMIDs. */
2478        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2479                WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2480                WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2481                WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2482                WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2483        }
2484}
2485
2486static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2487{
2488        int vmid;
2489
2490        /*
2491         * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2492         * access. Compute VMIDs should be enabled by FW for target VMIDs,
2493         * the driver can enable them for graphics. VMID0 should maintain
2494         * access so that HWS firmware can save/restore entries.
2495         */
2496        for (vmid = 1; vmid < 16; vmid++) {
2497                WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2498                WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2499                WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2500                WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2501        }
2502}
2503
2504static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2505{
2506        u32 tmp;
2507        int i;
2508
2509        WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2510
2511        gfx_v9_0_tiling_mode_table_init(adev);
2512
2513        gfx_v9_0_setup_rb(adev);
2514        gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2515        adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2516
2517        /* XXX SH_MEM regs */
2518        /* where to put LDS, scratch, GPUVM in FSA64 space */
2519        mutex_lock(&adev->srbm_mutex);
2520        for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2521                soc15_grbm_select(adev, 0, 0, 0, i);
2522                /* CP and shaders */
2523                if (i == 0) {
2524                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2525                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2526                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2527                                            !!amdgpu_noretry);
2528                        WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2529                        WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2530                } else {
2531                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2532                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2533                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2534                                            !!amdgpu_noretry);
2535                        WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2536                        tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2537                                (adev->gmc.private_aperture_start >> 48));
2538                        tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2539                                (adev->gmc.shared_aperture_start >> 48));
2540                        WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2541                }
2542        }
2543        soc15_grbm_select(adev, 0, 0, 0, 0);
2544
2545        mutex_unlock(&adev->srbm_mutex);
2546
2547        gfx_v9_0_init_compute_vmid(adev);
2548        gfx_v9_0_init_gds_vmid(adev);
2549}
2550
2551static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2552{
2553        u32 i, j, k;
2554        u32 mask;
2555
2556        mutex_lock(&adev->grbm_idx_mutex);
2557        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2558                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2559                        gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2560                        for (k = 0; k < adev->usec_timeout; k++) {
2561                                if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2562                                        break;
2563                                udelay(1);
2564                        }
2565                        if (k == adev->usec_timeout) {
2566                                gfx_v9_0_select_se_sh(adev, 0xffffffff,
2567                                                      0xffffffff, 0xffffffff);
2568                                mutex_unlock(&adev->grbm_idx_mutex);
2569                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2570                                         i, j);
2571                                return;
2572                        }
2573                }
2574        }
2575        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2576        mutex_unlock(&adev->grbm_idx_mutex);
2577
2578        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2579                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2580                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2581                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2582        for (k = 0; k < adev->usec_timeout; k++) {
2583                if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2584                        break;
2585                udelay(1);
2586        }
2587}
2588
2589static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2590                                               bool enable)
2591{
2592        u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2593
2594        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2595        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2596        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2597        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2598
2599        WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2600}
2601
2602static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2603{
2604        /* csib */
2605        WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2606                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
2607        WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2608                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2609        WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2610                        adev->gfx.rlc.clear_state_size);
2611}
2612
2613static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2614                                int indirect_offset,
2615                                int list_size,
2616                                int *unique_indirect_regs,
2617                                int unique_indirect_reg_count,
2618                                int *indirect_start_offsets,
2619                                int *indirect_start_offsets_count,
2620                                int max_start_offsets_count)
2621{
2622        int idx;
2623
2624        for (; indirect_offset < list_size; indirect_offset++) {
2625                WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2626                indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2627                *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2628
2629                while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2630                        indirect_offset += 2;
2631
2632                        /* look for the matching indice */
2633                        for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2634                                if (unique_indirect_regs[idx] ==
2635                                        register_list_format[indirect_offset] ||
2636                                        !unique_indirect_regs[idx])
2637                                        break;
2638                        }
2639
2640                        BUG_ON(idx >= unique_indirect_reg_count);
2641
2642                        if (!unique_indirect_regs[idx])
2643                                unique_indirect_regs[idx] = register_list_format[indirect_offset];
2644
2645                        indirect_offset++;
2646                }
2647        }
2648}
2649
2650static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2651{
2652        int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2653        int unique_indirect_reg_count = 0;
2654
2655        int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2656        int indirect_start_offsets_count = 0;
2657
2658        int list_size = 0;
2659        int i = 0, j = 0;
2660        u32 tmp = 0;
2661
2662        u32 *register_list_format =
2663                kmemdup(adev->gfx.rlc.register_list_format,
2664                        adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2665        if (!register_list_format)
2666                return -ENOMEM;
2667
2668        /* setup unique_indirect_regs array and indirect_start_offsets array */
2669        unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2670        gfx_v9_1_parse_ind_reg_list(register_list_format,
2671                                    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2672                                    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2673                                    unique_indirect_regs,
2674                                    unique_indirect_reg_count,
2675                                    indirect_start_offsets,
2676                                    &indirect_start_offsets_count,
2677                                    ARRAY_SIZE(indirect_start_offsets));
2678
2679        /* enable auto inc in case it is disabled */
2680        tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2681        tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2682        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2683
2684        /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2685        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2686                RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2687        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2688                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2689                        adev->gfx.rlc.register_restore[i]);
2690
2691        /* load indirect register */
2692        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2693                adev->gfx.rlc.reg_list_format_start);
2694
2695        /* direct register portion */
2696        for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2697                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2698                        register_list_format[i]);
2699
2700        /* indirect register portion */
2701        while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2702                if (register_list_format[i] == 0xFFFFFFFF) {
2703                        WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2704                        continue;
2705                }
2706
2707                WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2708                WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2709
2710                for (j = 0; j < unique_indirect_reg_count; j++) {
2711                        if (register_list_format[i] == unique_indirect_regs[j]) {
2712                                WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2713                                break;
2714                        }
2715                }
2716
2717                BUG_ON(j >= unique_indirect_reg_count);
2718
2719                i++;
2720        }
2721
2722        /* set save/restore list size */
2723        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2724        list_size = list_size >> 1;
2725        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2726                adev->gfx.rlc.reg_restore_list_size);
2727        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2728
2729        /* write the starting offsets to RLC scratch ram */
2730        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2731                adev->gfx.rlc.starting_offsets_start);
2732        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2733                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2734                       indirect_start_offsets[i]);
2735
2736        /* load unique indirect regs*/
2737        for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2738                if (unique_indirect_regs[i] != 0) {
2739                        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2740                               + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2741                               unique_indirect_regs[i] & 0x3FFFF);
2742
2743                        WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2744                               + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2745                               unique_indirect_regs[i] >> 20);
2746                }
2747        }
2748
2749        kfree(register_list_format);
2750        return 0;
2751}
2752
2753static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2754{
2755        WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2756}
2757
2758static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2759                                             bool enable)
2760{
2761        uint32_t data = 0;
2762        uint32_t default_data = 0;
2763
2764        default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2765        if (enable == true) {
2766                /* enable GFXIP control over CGPG */
2767                data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2768                if(default_data != data)
2769                        WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2770
2771                /* update status */
2772                data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2773                data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2774                if(default_data != data)
2775                        WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2776        } else {
2777                /* restore GFXIP control over GCPG */
2778                data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2779                if(default_data != data)
2780                        WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2781        }
2782}
2783
2784static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2785{
2786        uint32_t data = 0;
2787
2788        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2789                              AMD_PG_SUPPORT_GFX_SMG |
2790                              AMD_PG_SUPPORT_GFX_DMG)) {
2791                /* init IDLE_POLL_COUNT = 60 */
2792                data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2793                data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2794                data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2795                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2796
2797                /* init RLC PG Delay */
2798                data = 0;
2799                data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2800                data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2801                data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2802                data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2803                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2804
2805                data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2806                data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2807                data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2808                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2809
2810                data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2811                data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2812                data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2813                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2814
2815                data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2816                data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2817
2818                /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2819                data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2820                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2821
2822                pwr_10_0_gfxip_control_over_cgpg(adev, true);
2823        }
2824}
2825
2826static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2827                                                bool enable)
2828{
2829        uint32_t data = 0;
2830        uint32_t default_data = 0;
2831
2832        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2833        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2834                             SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2835                             enable ? 1 : 0);
2836        if (default_data != data)
2837                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2838}
2839
2840static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2841                                                bool enable)
2842{
2843        uint32_t data = 0;
2844        uint32_t default_data = 0;
2845
2846        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2847        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2848                             SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2849                             enable ? 1 : 0);
2850        if(default_data != data)
2851                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2852}
2853
2854static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2855                                        bool enable)
2856{
2857        uint32_t data = 0;
2858        uint32_t default_data = 0;
2859
2860        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2861        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2862                             CP_PG_DISABLE,
2863                             enable ? 0 : 1);
2864        if(default_data != data)
2865                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2866}
2867
2868static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2869                                                bool enable)
2870{
2871        uint32_t data, default_data;
2872
2873        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2874        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2875                             GFX_POWER_GATING_ENABLE,
2876                             enable ? 1 : 0);
2877        if(default_data != data)
2878                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2879}
2880
2881static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2882                                                bool enable)
2883{
2884        uint32_t data, default_data;
2885
2886        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2887        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2888                             GFX_PIPELINE_PG_ENABLE,
2889                             enable ? 1 : 0);
2890        if(default_data != data)
2891                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2892
2893        if (!enable)
2894                /* read any GFX register to wake up GFX */
2895                data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2896}
2897
2898static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2899                                                       bool enable)
2900{
2901        uint32_t data, default_data;
2902
2903        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2904        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2905                             STATIC_PER_CU_PG_ENABLE,
2906                             enable ? 1 : 0);
2907        if(default_data != data)
2908                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2909}
2910
2911static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2912                                                bool enable)
2913{
2914        uint32_t data, default_data;
2915
2916        default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2917        data = REG_SET_FIELD(data, RLC_PG_CNTL,
2918                             DYN_PER_CU_PG_ENABLE,
2919                             enable ? 1 : 0);
2920        if(default_data != data)
2921                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2922}
2923
2924static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2925{
2926        gfx_v9_0_init_csb(adev);
2927
2928        /*
2929         * Rlc save restore list is workable since v2_1.
2930         * And it's needed by gfxoff feature.
2931         */
2932        if (adev->gfx.rlc.is_rlc_v2_1) {
2933                gfx_v9_1_init_rlc_save_restore_list(adev);
2934                gfx_v9_0_enable_save_restore_machine(adev);
2935        }
2936
2937        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2938                              AMD_PG_SUPPORT_GFX_SMG |
2939                              AMD_PG_SUPPORT_GFX_DMG |
2940                              AMD_PG_SUPPORT_CP |
2941                              AMD_PG_SUPPORT_GDS |
2942                              AMD_PG_SUPPORT_RLC_SMU_HS)) {
2943                WREG32(mmRLC_JUMP_TABLE_RESTORE,
2944                       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2945                gfx_v9_0_init_gfx_power_gating(adev);
2946        }
2947}
2948
2949void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2950{
2951        WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2952        gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2953        gfx_v9_0_wait_for_rlc_serdes(adev);
2954}
2955
2956static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2957{
2958        WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2959        udelay(50);
2960        WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2961        udelay(50);
2962}
2963
2964static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2965{
2966#ifdef AMDGPU_RLC_DEBUG_RETRY
2967        u32 rlc_ucode_ver;
2968#endif
2969
2970        WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2971        udelay(50);
2972
2973        /* carrizo do enable cp interrupt after cp inited */
2974        if (!(adev->flags & AMD_IS_APU)) {
2975                gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2976                udelay(50);
2977        }
2978
2979#ifdef AMDGPU_RLC_DEBUG_RETRY
2980        /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2981        rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2982        if(rlc_ucode_ver == 0x108) {
2983                DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2984                                rlc_ucode_ver, adev->gfx.rlc_fw_version);
2985                /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2986                 * default is 0x9C4 to create a 100us interval */
2987                WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2988                /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2989                 * to disable the page fault retry interrupts, default is
2990                 * 0x100 (256) */
2991                WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2992        }
2993#endif
2994}
2995
2996static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2997{
2998        const struct rlc_firmware_header_v2_0 *hdr;
2999        const __le32 *fw_data;
3000        unsigned i, fw_size;
3001
3002        if (!adev->gfx.rlc_fw)
3003                return -EINVAL;
3004
3005        hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3006        amdgpu_ucode_print_rlc_hdr(&hdr->header);
3007
3008        fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3009                           le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3010        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3011
3012        WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3013                        RLCG_UCODE_LOADING_START_ADDRESS);
3014        for (i = 0; i < fw_size; i++)
3015                WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3016        WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3017
3018        return 0;
3019}
3020
3021static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3022{
3023        int r;
3024
3025        if (amdgpu_sriov_vf(adev)) {
3026                gfx_v9_0_init_csb(adev);
3027                return 0;
3028        }
3029
3030        adev->gfx.rlc.funcs->stop(adev);
3031
3032        /* disable CG */
3033        WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3034
3035        gfx_v9_0_init_pg(adev);
3036
3037        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3038                /* legacy rlc firmware loading */
3039                r = gfx_v9_0_rlc_load_microcode(adev);
3040                if (r)
3041                        return r;
3042        }
3043
3044        switch (adev->asic_type) {
3045        case CHIP_RAVEN:
3046                if (amdgpu_lbpw == 0)
3047                        gfx_v9_0_enable_lbpw(adev, false);
3048                else
3049                        gfx_v9_0_enable_lbpw(adev, true);
3050                break;
3051        case CHIP_VEGA20:
3052                if (amdgpu_lbpw > 0)
3053                        gfx_v9_0_enable_lbpw(adev, true);
3054                else
3055                        gfx_v9_0_enable_lbpw(adev, false);
3056                break;
3057        default:
3058                break;
3059        }
3060
3061        adev->gfx.rlc.funcs->start(adev);
3062
3063        return 0;
3064}
3065
3066static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3067{
3068        int i;
3069        u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3070
3071        tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3072        tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3073        tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3074        if (!enable) {
3075                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3076                        adev->gfx.gfx_ring[i].sched.ready = false;
3077        }
3078        WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3079        udelay(50);
3080}
3081
3082static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3083{
3084        const struct gfx_firmware_header_v1_0 *pfp_hdr;
3085        const struct gfx_firmware_header_v1_0 *ce_hdr;
3086        const struct gfx_firmware_header_v1_0 *me_hdr;
3087        const __le32 *fw_data;
3088        unsigned i, fw_size;
3089
3090        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3091                return -EINVAL;
3092
3093        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3094                adev->gfx.pfp_fw->data;
3095        ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3096                adev->gfx.ce_fw->data;
3097        me_hdr = (const struct gfx_firmware_header_v1_0 *)
3098                adev->gfx.me_fw->data;
3099
3100        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3101        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3102        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3103
3104        gfx_v9_0_cp_gfx_enable(adev, false);
3105
3106        /* PFP */
3107        fw_data = (const __le32 *)
3108                (adev->gfx.pfp_fw->data +
3109                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3110        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3111        WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3112        for (i = 0; i < fw_size; i++)
3113                WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3114        WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3115
3116        /* CE */
3117        fw_data = (const __le32 *)
3118                (adev->gfx.ce_fw->data +
3119                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3120        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3121        WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3122        for (i = 0; i < fw_size; i++)
3123                WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3124        WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3125
3126        /* ME */
3127        fw_data = (const __le32 *)
3128                (adev->gfx.me_fw->data +
3129                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3130        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3131        WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3132        for (i = 0; i < fw_size; i++)
3133                WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3134        WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3135
3136        return 0;
3137}
3138
3139static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3140{
3141        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3142        const struct cs_section_def *sect = NULL;
3143        const struct cs_extent_def *ext = NULL;
3144        int r, i, tmp;
3145
3146        /* init the CP */
3147        WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3148        WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3149
3150        gfx_v9_0_cp_gfx_enable(adev, true);
3151
3152        r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3153        if (r) {
3154                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3155                return r;
3156        }
3157
3158        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3159        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3160
3161        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3162        amdgpu_ring_write(ring, 0x80000000);
3163        amdgpu_ring_write(ring, 0x80000000);
3164
3165        for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3166                for (ext = sect->section; ext->extent != NULL; ++ext) {
3167                        if (sect->id == SECT_CONTEXT) {
3168                                amdgpu_ring_write(ring,
3169                                       PACKET3(PACKET3_SET_CONTEXT_REG,
3170                                               ext->reg_count));
3171                                amdgpu_ring_write(ring,
3172                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3173                                for (i = 0; i < ext->reg_count; i++)
3174                                        amdgpu_ring_write(ring, ext->extent[i]);
3175                        }
3176                }
3177        }
3178
3179        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3180        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3181
3182        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3183        amdgpu_ring_write(ring, 0);
3184
3185        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3186        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3187        amdgpu_ring_write(ring, 0x8000);
3188        amdgpu_ring_write(ring, 0x8000);
3189
3190        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3191        tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3192                (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3193        amdgpu_ring_write(ring, tmp);
3194        amdgpu_ring_write(ring, 0);
3195
3196        amdgpu_ring_commit(ring);
3197
3198        return 0;
3199}
3200
3201static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3202{
3203        struct amdgpu_ring *ring;
3204        u32 tmp;
3205        u32 rb_bufsz;
3206        u64 rb_addr, rptr_addr, wptr_gpu_addr;
3207
3208        /* Set the write pointer delay */
3209        WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3210
3211        /* set the RB to use vmid 0 */
3212        WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3213
3214        /* Set ring buffer size */
3215        ring = &adev->gfx.gfx_ring[0];
3216        rb_bufsz = order_base_2(ring->ring_size / 8);
3217        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3218        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3219#ifdef __BIG_ENDIAN
3220        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3221#endif
3222        WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3223
3224        /* Initialize the ring buffer's write pointers */
3225        ring->wptr = 0;
3226        WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3227        WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3228
3229        /* set the wb address wether it's enabled or not */
3230        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3231        WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3232        WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3233
3234        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3235        WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3236        WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3237
3238        mdelay(1);
3239        WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3240
3241        rb_addr = ring->gpu_addr >> 8;
3242        WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3243        WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3244
3245        tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3246        if (ring->use_doorbell) {
3247                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3248                                    DOORBELL_OFFSET, ring->doorbell_index);
3249                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3250                                    DOORBELL_EN, 1);
3251        } else {
3252                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3253        }
3254        WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3255
3256        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3257                        DOORBELL_RANGE_LOWER, ring->doorbell_index);
3258        WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3259
3260        WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3261                       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3262
3263
3264        /* start the ring */
3265        gfx_v9_0_cp_gfx_start(adev);
3266        ring->sched.ready = true;
3267
3268        return 0;
3269}
3270
3271static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3272{
3273        int i;
3274
3275        if (enable) {
3276                WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3277        } else {
3278                WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3279                        (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3280                for (i = 0; i < adev->gfx.num_compute_rings; i++)
3281                        adev->gfx.compute_ring[i].sched.ready = false;
3282                adev->gfx.kiq.ring.sched.ready = false;
3283        }
3284        udelay(50);
3285}
3286
3287static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3288{
3289        const struct gfx_firmware_header_v1_0 *mec_hdr;
3290        const __le32 *fw_data;
3291        unsigned i;
3292        u32 tmp;
3293
3294        if (!adev->gfx.mec_fw)
3295                return -EINVAL;
3296
3297        gfx_v9_0_cp_compute_enable(adev, false);
3298
3299        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3300        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3301
3302        fw_data = (const __le32 *)
3303                (adev->gfx.mec_fw->data +
3304                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3305        tmp = 0;
3306        tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3307        tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3308        WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3309
3310        WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3311                adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3312        WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3313                upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3314
3315        /* MEC1 */
3316        WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3317                         mec_hdr->jt_offset);
3318        for (i = 0; i < mec_hdr->jt_size; i++)
3319                WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3320                        le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3321
3322        WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3323                        adev->gfx.mec_fw_version);
3324        /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3325
3326        return 0;
3327}
3328
3329/* KIQ functions */
3330static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3331{
3332        uint32_t tmp;
3333        struct amdgpu_device *adev = ring->adev;
3334
3335        /* tell RLC which is KIQ queue */
3336        tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3337        tmp &= 0xffffff00;
3338        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3339        WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3340        tmp |= 0x80;
3341        WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3342}
3343
3344static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3345{
3346        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3347        uint64_t queue_mask = 0;
3348        int r, i;
3349
3350        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3351                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3352                        continue;
3353
3354                /* This situation may be hit in the future if a new HW
3355                 * generation exposes more than 64 queues. If so, the
3356                 * definition of queue_mask needs updating */
3357                if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3358                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3359                        break;
3360                }
3361
3362                queue_mask |= (1ull << i);
3363        }
3364
3365        r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3366        if (r) {
3367                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3368                return r;
3369        }
3370
3371        /* set resources */
3372        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3373        amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3374                          PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3375        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3376        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3377        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3378        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3379        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3380        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3381        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3382                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3383                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3384                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3385
3386                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3387                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3388                amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3389                                  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3390                                  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3391                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3392                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3393                                  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3394                                  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3395                                  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3396                                  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3397                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3398                amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3399                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3400                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3401                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3402                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3403        }
3404
3405        r = amdgpu_ring_test_helper(kiq_ring);
3406        if (r)
3407                DRM_ERROR("KCQ enable failed\n");
3408
3409        return r;
3410}
3411
3412static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3413{
3414        struct amdgpu_device *adev = ring->adev;
3415        struct v9_mqd *mqd = ring->mqd_ptr;
3416        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3417        uint32_t tmp;
3418
3419        mqd->header = 0xC0310800;
3420        mqd->compute_pipelinestat_enable = 0x00000001;
3421        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3422        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3423        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3424        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3425        mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3426        mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3427        mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3428        mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3429        mqd->compute_misc_reserved = 0x00000003;
3430
3431        mqd->dynamic_cu_mask_addr_lo =
3432                lower_32_bits(ring->mqd_gpu_addr
3433                              + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3434        mqd->dynamic_cu_mask_addr_hi =
3435                upper_32_bits(ring->mqd_gpu_addr
3436                              + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3437
3438        eop_base_addr = ring->eop_gpu_addr >> 8;
3439        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3440        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3441
3442        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3443        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3444        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3445                        (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3446
3447        mqd->cp_hqd_eop_control = tmp;
3448
3449        /* enable doorbell? */
3450        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3451
3452        if (ring->use_doorbell) {
3453                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3454                                    DOORBELL_OFFSET, ring->doorbell_index);
3455                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456                                    DOORBELL_EN, 1);
3457                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458                                    DOORBELL_SOURCE, 0);
3459                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3460                                    DOORBELL_HIT, 0);
3461        } else {
3462                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3463                                         DOORBELL_EN, 0);
3464        }
3465
3466        mqd->cp_hqd_pq_doorbell_control = tmp;
3467
3468        /* disable the queue if it's active */
3469        ring->wptr = 0;
3470        mqd->cp_hqd_dequeue_request = 0;
3471        mqd->cp_hqd_pq_rptr = 0;
3472        mqd->cp_hqd_pq_wptr_lo = 0;
3473        mqd->cp_hqd_pq_wptr_hi = 0;
3474
3475        /* set the pointer to the MQD */
3476        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3477        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3478
3479        /* set MQD vmid to 0 */
3480        tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3481        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3482        mqd->cp_mqd_control = tmp;
3483
3484        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3485        hqd_gpu_addr = ring->gpu_addr >> 8;
3486        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3487        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3488
3489        /* set up the HQD, this is similar to CP_RB0_CNTL */
3490        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3491        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3492                            (order_base_2(ring->ring_size / 4) - 1));
3493        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3494                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3495#ifdef __BIG_ENDIAN
3496        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3497#endif
3498        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3499        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3500        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3501        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3502        mqd->cp_hqd_pq_control = tmp;
3503
3504        /* set the wb address whether it's enabled or not */
3505        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3506        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3507        mqd->cp_hqd_pq_rptr_report_addr_hi =
3508                upper_32_bits(wb_gpu_addr) & 0xffff;
3509
3510        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3511        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3512        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3513        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3514
3515        tmp = 0;
3516        /* enable the doorbell if requested */
3517        if (ring->use_doorbell) {
3518                tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3519                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520                                DOORBELL_OFFSET, ring->doorbell_index);
3521
3522                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523                                         DOORBELL_EN, 1);
3524                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525                                         DOORBELL_SOURCE, 0);
3526                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3527                                         DOORBELL_HIT, 0);
3528        }
3529
3530        mqd->cp_hqd_pq_doorbell_control = tmp;
3531
3532        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3533        ring->wptr = 0;
3534        mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3535
3536        /* set the vmid for the queue */
3537        mqd->cp_hqd_vmid = 0;
3538
3539        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3540        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3541        mqd->cp_hqd_persistent_state = tmp;
3542
3543        /* set MIN_IB_AVAIL_SIZE */
3544        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3545        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3546        mqd->cp_hqd_ib_control = tmp;
3547
3548        /* activate the queue */
3549        mqd->cp_hqd_active = 1;
3550
3551        return 0;
3552}
3553
3554static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3555{
3556        struct amdgpu_device *adev = ring->adev;
3557        struct v9_mqd *mqd = ring->mqd_ptr;
3558        int j;
3559
3560        /* disable wptr polling */
3561        WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3562
3563        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3564               mqd->cp_hqd_eop_base_addr_lo);
3565        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3566               mqd->cp_hqd_eop_base_addr_hi);
3567
3568        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3569        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3570               mqd->cp_hqd_eop_control);
3571
3572        /* enable doorbell? */
3573        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3574               mqd->cp_hqd_pq_doorbell_control);
3575
3576        /* disable the queue if it's active */
3577        if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3578                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3579                for (j = 0; j < adev->usec_timeout; j++) {
3580                        if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3581                                break;
3582                        udelay(1);
3583                }
3584                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3585                       mqd->cp_hqd_dequeue_request);
3586                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3587                       mqd->cp_hqd_pq_rptr);
3588                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3589                       mqd->cp_hqd_pq_wptr_lo);
3590                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3591                       mqd->cp_hqd_pq_wptr_hi);
3592        }
3593
3594        /* set the pointer to the MQD */
3595        WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3596               mqd->cp_mqd_base_addr_lo);
3597        WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3598               mqd->cp_mqd_base_addr_hi);
3599
3600        /* set MQD vmid to 0 */
3601        WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3602               mqd->cp_mqd_control);
3603
3604        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3605        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3606               mqd->cp_hqd_pq_base_lo);
3607        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3608               mqd->cp_hqd_pq_base_hi);
3609
3610        /* set up the HQD, this is similar to CP_RB0_CNTL */
3611        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3612               mqd->cp_hqd_pq_control);
3613
3614        /* set the wb address whether it's enabled or not */
3615        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3616                                mqd->cp_hqd_pq_rptr_report_addr_lo);
3617        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3618                                mqd->cp_hqd_pq_rptr_report_addr_hi);
3619
3620        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3621        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3622               mqd->cp_hqd_pq_wptr_poll_addr_lo);
3623        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3624               mqd->cp_hqd_pq_wptr_poll_addr_hi);
3625
3626        /* enable the doorbell if requested */
3627        if (ring->use_doorbell) {
3628                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3629                                        (adev->doorbell_index.kiq * 2) << 2);
3630                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3631                                        (adev->doorbell_index.userqueue_end * 2) << 2);
3632        }
3633
3634        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3635               mqd->cp_hqd_pq_doorbell_control);
3636
3637        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3638        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3639               mqd->cp_hqd_pq_wptr_lo);
3640        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3641               mqd->cp_hqd_pq_wptr_hi);
3642
3643        /* set the vmid for the queue */
3644        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3645
3646        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3647               mqd->cp_hqd_persistent_state);
3648
3649        /* activate the queue */
3650        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3651               mqd->cp_hqd_active);
3652
3653        if (ring->use_doorbell)
3654                WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3655
3656        return 0;
3657}
3658
3659static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3660{
3661        struct amdgpu_device *adev = ring->adev;
3662        int j;
3663
3664        /* disable the queue if it's active */
3665        if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3666
3667                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3668
3669                for (j = 0; j < adev->usec_timeout; j++) {
3670                        if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3671                                break;
3672                        udelay(1);
3673                }
3674
3675                if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3676                        DRM_DEBUG("KIQ dequeue request failed.\n");
3677
3678                        /* Manual disable if dequeue request times out */
3679                        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3680                }
3681
3682                WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3683                      0);
3684        }
3685
3686        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3687        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3688        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3689        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3690        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3691        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3692        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3693        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3694
3695        return 0;
3696}
3697
3698static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3699{
3700        struct amdgpu_device *adev = ring->adev;
3701        struct v9_mqd *mqd = ring->mqd_ptr;
3702        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3703
3704        gfx_v9_0_kiq_setting(ring);
3705
3706        if (adev->in_gpu_reset) { /* for GPU_RESET case */
3707                /* reset MQD to a clean status */
3708                if (adev->gfx.mec.mqd_backup[mqd_idx])
3709                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3710
3711                /* reset ring buffer */
3712                ring->wptr = 0;
3713                amdgpu_ring_clear_ring(ring);
3714
3715                mutex_lock(&adev->srbm_mutex);
3716                soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3717                gfx_v9_0_kiq_init_register(ring);
3718                soc15_grbm_select(adev, 0, 0, 0, 0);
3719                mutex_unlock(&adev->srbm_mutex);
3720        } else {
3721                memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3722                ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3723                ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3724                mutex_lock(&adev->srbm_mutex);
3725                soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3726                gfx_v9_0_mqd_init(ring);
3727                gfx_v9_0_kiq_init_register(ring);
3728                soc15_grbm_select(adev, 0, 0, 0, 0);
3729                mutex_unlock(&adev->srbm_mutex);
3730
3731                if (adev->gfx.mec.mqd_backup[mqd_idx])
3732                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3733        }
3734
3735        return 0;
3736}
3737
3738static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3739{
3740        struct amdgpu_device *adev = ring->adev;
3741        struct v9_mqd *mqd = ring->mqd_ptr;
3742        int mqd_idx = ring - &adev->gfx.compute_ring[0];
3743
3744        if (!adev->in_gpu_reset && !adev->in_suspend) {
3745                memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3746                ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3747                ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3748                mutex_lock(&adev->srbm_mutex);
3749                soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3750                gfx_v9_0_mqd_init(ring);
3751                soc15_grbm_select(adev, 0, 0, 0, 0);
3752                mutex_unlock(&adev->srbm_mutex);
3753
3754                if (adev->gfx.mec.mqd_backup[mqd_idx])
3755                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3756        } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3757                /* reset MQD to a clean status */
3758                if (adev->gfx.mec.mqd_backup[mqd_idx])
3759                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3760
3761                /* reset ring buffer */
3762                ring->wptr = 0;
3763                amdgpu_ring_clear_ring(ring);
3764        } else {
3765                amdgpu_ring_clear_ring(ring);
3766        }
3767
3768        return 0;
3769}
3770
3771static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3772{
3773        struct amdgpu_ring *ring;
3774        int r;
3775
3776        ring = &adev->gfx.kiq.ring;
3777
3778        r = amdgpu_bo_reserve(ring->mqd_obj, false);
3779        if (unlikely(r != 0))
3780                return r;
3781
3782        r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3783        if (unlikely(r != 0))
3784                return r;
3785
3786        gfx_v9_0_kiq_init_queue(ring);
3787        amdgpu_bo_kunmap(ring->mqd_obj);
3788        ring->mqd_ptr = NULL;
3789        amdgpu_bo_unreserve(ring->mqd_obj);
3790        ring->sched.ready = true;
3791        return 0;
3792}
3793
3794static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3795{
3796        struct amdgpu_ring *ring = NULL;
3797        int r = 0, i;
3798
3799        gfx_v9_0_cp_compute_enable(adev, true);
3800
3801        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3802                ring = &adev->gfx.compute_ring[i];
3803
3804                r = amdgpu_bo_reserve(ring->mqd_obj, false);
3805                if (unlikely(r != 0))
3806                        goto done;
3807                r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3808                if (!r) {
3809                        r = gfx_v9_0_kcq_init_queue(ring);
3810                        amdgpu_bo_kunmap(ring->mqd_obj);
3811                        ring->mqd_ptr = NULL;
3812                }
3813                amdgpu_bo_unreserve(ring->mqd_obj);
3814                if (r)
3815                        goto done;
3816        }
3817
3818        r = gfx_v9_0_kiq_kcq_enable(adev);
3819done:
3820        return r;
3821}
3822
3823static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3824{
3825        int r, i;
3826        struct amdgpu_ring *ring;
3827
3828        if (!(adev->flags & AMD_IS_APU))
3829                gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3830
3831        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3832                if (adev->asic_type != CHIP_ARCTURUS) {
3833                        /* legacy firmware loading */
3834                        r = gfx_v9_0_cp_gfx_load_microcode(adev);
3835                        if (r)
3836                                return r;
3837                }
3838
3839                r = gfx_v9_0_cp_compute_load_microcode(adev);
3840                if (r)
3841                        return r;
3842        }
3843
3844        r = gfx_v9_0_kiq_resume(adev);
3845        if (r)
3846                return r;
3847
3848        if (adev->asic_type != CHIP_ARCTURUS) {
3849                r = gfx_v9_0_cp_gfx_resume(adev);
3850                if (r)
3851                        return r;
3852        }
3853
3854        r = gfx_v9_0_kcq_resume(adev);
3855        if (r)
3856                return r;
3857
3858        if (adev->asic_type != CHIP_ARCTURUS) {
3859                ring = &adev->gfx.gfx_ring[0];
3860                r = amdgpu_ring_test_helper(ring);
3861                if (r)
3862                        return r;
3863        }
3864
3865        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3866                ring = &adev->gfx.compute_ring[i];
3867                amdgpu_ring_test_helper(ring);
3868        }
3869
3870        gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3871
3872        return 0;
3873}
3874
3875static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3876{
3877        if (adev->asic_type != CHIP_ARCTURUS)
3878                gfx_v9_0_cp_gfx_enable(adev, enable);
3879        gfx_v9_0_cp_compute_enable(adev, enable);
3880}
3881
3882static int gfx_v9_0_hw_init(void *handle)
3883{
3884        int r;
3885        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3886
3887        if (!amdgpu_sriov_vf(adev))
3888                gfx_v9_0_init_golden_registers(adev);
3889
3890        gfx_v9_0_constants_init(adev);
3891
3892        r = gfx_v9_0_csb_vram_pin(adev);
3893        if (r)
3894                return r;
3895
3896        r = adev->gfx.rlc.funcs->resume(adev);
3897        if (r)
3898                return r;
3899
3900        r = gfx_v9_0_cp_resume(adev);
3901        if (r)
3902                return r;
3903
3904        if (adev->asic_type != CHIP_ARCTURUS) {
3905                r = gfx_v9_0_ngg_en(adev);
3906                if (r)
3907                        return r;
3908        }
3909
3910        return r;
3911}
3912
3913static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3914{
3915        int r, i;
3916        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3917
3918        r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3919        if (r)
3920                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3921
3922        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3923                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3924
3925                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3926                amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3927                                                PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3928                                                PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3929                                                PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3930                                                PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3931                amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3932                amdgpu_ring_write(kiq_ring, 0);
3933                amdgpu_ring_write(kiq_ring, 0);
3934                amdgpu_ring_write(kiq_ring, 0);
3935        }
3936        r = amdgpu_ring_test_helper(kiq_ring);
3937        if (r)
3938                DRM_ERROR("KCQ disable failed\n");
3939
3940        return r;
3941}
3942
3943static int gfx_v9_0_hw_fini(void *handle)
3944{
3945        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3946
3947        amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3948        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3949        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3950
3951        /* disable KCQ to avoid CPC touch memory not valid anymore */
3952        gfx_v9_0_kcq_disable(adev);
3953
3954        if (amdgpu_sriov_vf(adev)) {
3955                gfx_v9_0_cp_gfx_enable(adev, false);
3956                /* must disable polling for SRIOV when hw finished, otherwise
3957                 * CPC engine may still keep fetching WB address which is already
3958                 * invalid after sw finished and trigger DMAR reading error in
3959                 * hypervisor side.
3960                 */
3961                WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3962                return 0;
3963        }
3964
3965        /* Use deinitialize sequence from CAIL when unbinding device from driver,
3966         * otherwise KIQ is hanging when binding back
3967         */
3968        if (!adev->in_gpu_reset && !adev->in_suspend) {
3969                mutex_lock(&adev->srbm_mutex);
3970                soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3971                                adev->gfx.kiq.ring.pipe,
3972                                adev->gfx.kiq.ring.queue, 0);
3973                gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3974                soc15_grbm_select(adev, 0, 0, 0, 0);
3975                mutex_unlock(&adev->srbm_mutex);
3976        }
3977
3978        gfx_v9_0_cp_enable(adev, false);
3979        adev->gfx.rlc.funcs->stop(adev);
3980
3981        gfx_v9_0_csb_vram_unpin(adev);
3982
3983        return 0;
3984}
3985
3986static int gfx_v9_0_suspend(void *handle)
3987{
3988        return gfx_v9_0_hw_fini(handle);
3989}
3990
3991static int gfx_v9_0_resume(void *handle)
3992{
3993        return gfx_v9_0_hw_init(handle);
3994}
3995
3996static bool gfx_v9_0_is_idle(void *handle)
3997{
3998        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3999
4000        if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4001                                GRBM_STATUS, GUI_ACTIVE))
4002                return false;
4003        else
4004                return true;
4005}
4006
4007static int gfx_v9_0_wait_for_idle(void *handle)
4008{
4009        unsigned i;
4010        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4011
4012        for (i = 0; i < adev->usec_timeout; i++) {
4013                if (gfx_v9_0_is_idle(handle))
4014                        return 0;
4015                udelay(1);
4016        }
4017        return -ETIMEDOUT;
4018}
4019
4020static int gfx_v9_0_soft_reset(void *handle)
4021{
4022        u32 grbm_soft_reset = 0;
4023        u32 tmp;
4024        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4025
4026        /* GRBM_STATUS */
4027        tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4028        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4029                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4030                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4031                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4032                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4033                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4034                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4035                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4036                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4037                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4038        }
4039
4040        if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4041                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4042                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4043        }
4044
4045        /* GRBM_STATUS2 */
4046        tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4047        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4048                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4049                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4050
4051
4052        if (grbm_soft_reset) {
4053                /* stop the rlc */
4054                adev->gfx.rlc.funcs->stop(adev);
4055
4056                if (adev->asic_type != CHIP_ARCTURUS)
4057                        /* Disable GFX parsing/prefetching */
4058                        gfx_v9_0_cp_gfx_enable(adev, false);
4059
4060                /* Disable MEC parsing/prefetching */
4061                gfx_v9_0_cp_compute_enable(adev, false);
4062
4063                if (grbm_soft_reset) {
4064                        tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4065                        tmp |= grbm_soft_reset;
4066                        dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4067                        WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4068                        tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4069
4070                        udelay(50);
4071
4072                        tmp &= ~grbm_soft_reset;
4073                        WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4074                        tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4075                }
4076
4077                /* Wait a little for things to settle down */
4078                udelay(50);
4079        }
4080        return 0;
4081}
4082
4083static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4084{
4085        uint64_t clock;
4086
4087        mutex_lock(&adev->gfx.gpu_clock_mutex);
4088        WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4089        clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4090                ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4091        mutex_unlock(&adev->gfx.gpu_clock_mutex);
4092        return clock;
4093}
4094
4095static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4096                                          uint32_t vmid,
4097                                          uint32_t gds_base, uint32_t gds_size,
4098                                          uint32_t gws_base, uint32_t gws_size,
4099                                          uint32_t oa_base, uint32_t oa_size)
4100{
4101        struct amdgpu_device *adev = ring->adev;
4102
4103        /* GDS Base */
4104        gfx_v9_0_write_data_to_reg(ring, 0, false,
4105                                   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4106                                   gds_base);
4107
4108        /* GDS Size */
4109        gfx_v9_0_write_data_to_reg(ring, 0, false,
4110                                   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4111                                   gds_size);
4112
4113        /* GWS */
4114        gfx_v9_0_write_data_to_reg(ring, 0, false,
4115                                   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4116                                   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4117
4118        /* OA */
4119        gfx_v9_0_write_data_to_reg(ring, 0, false,
4120                                   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4121                                   (1 << (oa_size + oa_base)) - (1 << oa_base));
4122}
4123
4124static const u32 vgpr_init_compute_shader[] =
4125{
4126        0xb07c0000, 0xbe8000ff,
4127        0x000000f8, 0xbf110800,
4128        0x7e000280, 0x7e020280,
4129        0x7e040280, 0x7e060280,
4130        0x7e080280, 0x7e0a0280,
4131        0x7e0c0280, 0x7e0e0280,
4132        0x80808800, 0xbe803200,
4133        0xbf84fff5, 0xbf9c0000,
4134        0xd28c0001, 0x0001007f,
4135        0xd28d0001, 0x0002027e,
4136        0x10020288, 0xb8810904,
4137        0xb7814000, 0xd1196a01,
4138        0x00000301, 0xbe800087,
4139        0xbefc00c1, 0xd89c4000,
4140        0x00020201, 0xd89cc080,
4141        0x00040401, 0x320202ff,
4142        0x00000800, 0x80808100,
4143        0xbf84fff8, 0x7e020280,
4144        0xbf810000, 0x00000000,
4145};
4146
4147static const u32 sgpr_init_compute_shader[] =
4148{
4149        0xb07c0000, 0xbe8000ff,
4150        0x0000005f, 0xbee50080,
4151        0xbe812c65, 0xbe822c65,
4152        0xbe832c65, 0xbe842c65,
4153        0xbe852c65, 0xb77c0005,
4154        0x80808500, 0xbf84fff8,
4155        0xbe800080, 0xbf810000,
4156};
4157
4158static const struct soc15_reg_entry vgpr_init_regs[] = {
4159   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4160   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4161   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4162   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4163   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4164   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4165   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4166   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4167   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4168   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4169};
4170
4171static const struct soc15_reg_entry sgpr_init_regs[] = {
4172   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4173   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4174   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4175   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4176   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4177   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4178   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4179   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4180   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4181   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4182};
4183
4184static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4185   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4186   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4187   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4188   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4189   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4190   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4191   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4192   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4193   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4194   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4195   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4196   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4197   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4198   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4199   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4200   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4201   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4202   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4203   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4204   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4205   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4206   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4207   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4208   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4209   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4210   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4211   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4212   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4213   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4214   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4215   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4216   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4217};
4218
4219static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4220{
4221        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4222        int i, r;
4223
4224        r = amdgpu_ring_alloc(ring, 7);
4225        if (r) {
4226                DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4227                        ring->name, r);
4228                return r;
4229        }
4230
4231        WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4232        WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4233
4234        amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4235        amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4236                                PACKET3_DMA_DATA_DST_SEL(1) |
4237                                PACKET3_DMA_DATA_SRC_SEL(2) |
4238                                PACKET3_DMA_DATA_ENGINE(0)));
4239        amdgpu_ring_write(ring, 0);
4240        amdgpu_ring_write(ring, 0);
4241        amdgpu_ring_write(ring, 0);
4242        amdgpu_ring_write(ring, 0);
4243        amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4244                                adev->gds.gds_size);
4245
4246        amdgpu_ring_commit(ring);
4247
4248        for (i = 0; i < adev->usec_timeout; i++) {
4249                if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4250                        break;
4251                udelay(1);
4252        }
4253
4254        if (i >= adev->usec_timeout)
4255                r = -ETIMEDOUT;
4256
4257        WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4258
4259        return r;
4260}
4261
4262static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4263{
4264        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4265        struct amdgpu_ib ib;
4266        struct dma_fence *f = NULL;
4267        int r, i, j, k;
4268        unsigned total_size, vgpr_offset, sgpr_offset;
4269        u64 gpu_addr;
4270
4271        /* only support when RAS is enabled */
4272        if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4273                return 0;
4274
4275        /* bail if the compute ring is not ready */
4276        if (!ring->sched.ready)
4277                return 0;
4278
4279        total_size =
4280                ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4281        total_size +=
4282                ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4283        total_size = ALIGN(total_size, 256);
4284        vgpr_offset = total_size;
4285        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4286        sgpr_offset = total_size;
4287        total_size += sizeof(sgpr_init_compute_shader);
4288
4289        /* allocate an indirect buffer to put the commands in */
4290        memset(&ib, 0, sizeof(ib));
4291        r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4292        if (r) {
4293                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4294                return r;
4295        }
4296
4297        /* load the compute shaders */
4298        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4299                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4300
4301        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4302                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4303
4304        /* init the ib length to 0 */
4305        ib.length_dw = 0;
4306
4307        /* VGPR */
4308        /* write the register state for the compute dispatch */
4309        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4310                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4311                ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4312                                                                - PACKET3_SET_SH_REG_START;
4313                ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4314        }
4315        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4316        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4317        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4318        ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4319                                                        - PACKET3_SET_SH_REG_START;
4320        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4321        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4322
4323        /* write dispatch packet */
4324        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4325        ib.ptr[ib.length_dw++] = 128; /* x */
4326        ib.ptr[ib.length_dw++] = 1; /* y */
4327        ib.ptr[ib.length_dw++] = 1; /* z */
4328        ib.ptr[ib.length_dw++] =
4329                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4330
4331        /* write CS partial flush packet */
4332        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4333        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4334
4335        /* SGPR */
4336        /* write the register state for the compute dispatch */
4337        for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4338                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4339                ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4340                                                                - PACKET3_SET_SH_REG_START;
4341                ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4342        }
4343        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4344        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4345        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4346        ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4347                                                        - PACKET3_SET_SH_REG_START;
4348        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4349        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4350
4351        /* write dispatch packet */
4352        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4353        ib.ptr[ib.length_dw++] = 128; /* x */
4354        ib.ptr[ib.length_dw++] = 1; /* y */
4355        ib.ptr[ib.length_dw++] = 1; /* z */
4356        ib.ptr[ib.length_dw++] =
4357                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4358
4359        /* write CS partial flush packet */
4360        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4361        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4362
4363        /* shedule the ib on the ring */
4364        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4365        if (r) {
4366                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4367                goto fail;
4368        }
4369
4370        /* wait for the GPU to finish processing the IB */
4371        r = dma_fence_wait(f, false);
4372        if (r) {
4373                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4374                goto fail;
4375        }
4376
4377        /* read back registers to clear the counters */
4378        mutex_lock(&adev->grbm_idx_mutex);
4379        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4380                for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4381                        for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4382                                gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4383                                RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4384                        }
4385                }
4386        }
4387        WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4388        mutex_unlock(&adev->grbm_idx_mutex);
4389
4390fail:
4391        amdgpu_ib_free(adev, &ib, NULL);
4392        dma_fence_put(f);
4393
4394        return r;
4395}
4396
4397static int gfx_v9_0_early_init(void *handle)
4398{
4399        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4400
4401        if (adev->asic_type == CHIP_ARCTURUS)
4402                adev->gfx.num_gfx_rings = 0;
4403        else
4404                adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4405        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4406        gfx_v9_0_set_ring_funcs(adev);
4407        gfx_v9_0_set_irq_funcs(adev);
4408        gfx_v9_0_set_gds_init(adev);
4409        gfx_v9_0_set_rlc_funcs(adev);
4410
4411        return 0;
4412}
4413
4414static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4415                struct ras_err_data *err_data,
4416                struct amdgpu_iv_entry *entry);
4417
4418static int gfx_v9_0_ecc_late_init(void *handle)
4419{
4420        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4421        struct ras_common_if **ras_if = &adev->gfx.ras_if;
4422        struct ras_ih_if ih_info = {
4423                .cb = gfx_v9_0_process_ras_data_cb,
4424        };
4425        struct ras_fs_if fs_info = {
4426                .sysfs_name = "gfx_err_count",
4427                .debugfs_name = "gfx_err_inject",
4428        };
4429        struct ras_common_if ras_block = {
4430                .block = AMDGPU_RAS_BLOCK__GFX,
4431                .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4432                .sub_block_index = 0,
4433                .name = "gfx",
4434        };
4435        int r;
4436
4437        if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4438                amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4439                return 0;
4440        }
4441
4442        r = gfx_v9_0_do_edc_gds_workarounds(adev);
4443        if (r)
4444                return r;
4445
4446        /* requires IBs so do in late init after IB pool is initialized */
4447        r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4448        if (r)
4449                return r;
4450
4451        /* handle resume path. */
4452        if (*ras_if) {
4453                /* resend ras TA enable cmd during resume.
4454                 * prepare to handle failure.
4455                 */
4456                ih_info.head = **ras_if;
4457                r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4458                if (r) {
4459                        if (r == -EAGAIN) {
4460                                /* request a gpu reset. will run again. */
4461                                amdgpu_ras_request_reset_on_boot(adev,
4462                                                AMDGPU_RAS_BLOCK__GFX);
4463                                return 0;
4464                        }
4465                        /* fail to enable ras, cleanup all. */
4466                        goto irq;
4467                }
4468                /* enable successfully. continue. */
4469                goto resume;
4470        }
4471
4472        *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4473        if (!*ras_if)
4474                return -ENOMEM;
4475
4476        **ras_if = ras_block;
4477
4478        r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4479        if (r) {
4480                if (r == -EAGAIN) {
4481                        amdgpu_ras_request_reset_on_boot(adev,
4482                                        AMDGPU_RAS_BLOCK__GFX);
4483                        r = 0;
4484                }
4485                goto feature;
4486        }
4487
4488        ih_info.head = **ras_if;
4489        fs_info.head = **ras_if;
4490
4491        r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4492        if (r)
4493                goto interrupt;
4494
4495        amdgpu_ras_debugfs_create(adev, &fs_info);
4496
4497        r = amdgpu_ras_sysfs_create(adev, &fs_info);
4498        if (r)
4499                goto sysfs;
4500resume:
4501        r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4502        if (r)
4503                goto irq;
4504
4505        return 0;
4506irq:
4507        amdgpu_ras_sysfs_remove(adev, *ras_if);
4508sysfs:
4509        amdgpu_ras_debugfs_remove(adev, *ras_if);
4510        amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4511interrupt:
4512        amdgpu_ras_feature_enable(adev, *ras_if, 0);
4513feature:
4514        kfree(*ras_if);
4515        *ras_if = NULL;
4516        return r;
4517}
4518
4519static int gfx_v9_0_late_init(void *handle)
4520{
4521        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4522        int r;
4523
4524        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4525        if (r)
4526                return r;
4527
4528        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4529        if (r)
4530                return r;
4531
4532        r = gfx_v9_0_ecc_late_init(handle);
4533        if (r)
4534                return r;
4535
4536        return 0;
4537}
4538
4539static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4540{
4541        uint32_t rlc_setting;
4542
4543        /* if RLC is not enabled, do nothing */
4544        rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4545        if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4546                return false;
4547
4548        return true;
4549}
4550
4551static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4552{
4553        uint32_t data;
4554        unsigned i;
4555
4556        data = RLC_SAFE_MODE__CMD_MASK;
4557        data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4558        WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4559
4560        /* wait for RLC_SAFE_MODE */
4561        for (i = 0; i < adev->usec_timeout; i++) {
4562                if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4563                        break;
4564                udelay(1);
4565        }
4566}
4567
4568static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4569{
4570        uint32_t data;
4571
4572        data = RLC_SAFE_MODE__CMD_MASK;
4573        WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4574}
4575
4576static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4577                                                bool enable)
4578{
4579        amdgpu_gfx_rlc_enter_safe_mode(adev);
4580
4581        if (is_support_sw_smu(adev) && !enable)
4582                smu_set_gfx_cgpg(&adev->smu, enable);
4583
4584        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4585                gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4586                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4587                        gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4588        } else {
4589                gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4590                gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4591        }
4592
4593        amdgpu_gfx_rlc_exit_safe_mode(adev);
4594}
4595
4596static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4597                                                bool enable)
4598{
4599        /* TODO: double check if we need to perform under safe mode */
4600        /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4601
4602        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4603                gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4604        else
4605                gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4606
4607        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4608                gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4609        else
4610                gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4611
4612        /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4613}
4614
4615static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4616                                                      bool enable)
4617{
4618        uint32_t data, def;
4619
4620        amdgpu_gfx_rlc_enter_safe_mode(adev);
4621
4622        /* It is disabled by HW by default */
4623        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4624                /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4625                def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4626
4627                if (adev->asic_type != CHIP_VEGA12)
4628                        data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4629
4630                data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4631                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4632                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4633
4634                /* only for Vega10 & Raven1 */
4635                data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4636
4637                if (def != data)
4638                        WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4639
4640                /* MGLS is a global flag to control all MGLS in GFX */
4641                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4642                        /* 2 - RLC memory Light sleep */
4643                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4644                                def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4645                                data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4646                                if (def != data)
4647                                        WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4648                        }
4649                        /* 3 - CP memory Light sleep */
4650                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4651                                def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4652                                data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4653                                if (def != data)
4654                                        WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4655                        }
4656                }
4657        } else {
4658                /* 1 - MGCG_OVERRIDE */
4659                def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4660
4661                if (adev->asic_type != CHIP_VEGA12)
4662                        data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4663
4664                data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4665                         RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4666                         RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4667                         RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4668
4669                if (def != data)
4670                        WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4671
4672                /* 2 - disable MGLS in RLC */
4673                data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4674                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4675                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4676                        WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4677                }
4678
4679                /* 3 - disable MGLS in CP */
4680                data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4681                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4682                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4683                        WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4684                }
4685        }
4686
4687        amdgpu_gfx_rlc_exit_safe_mode(adev);
4688}
4689
4690static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4691                                           bool enable)
4692{
4693        uint32_t data, def;
4694
4695        if (adev->asic_type == CHIP_ARCTURUS)
4696                return;
4697
4698        amdgpu_gfx_rlc_enter_safe_mode(adev);
4699
4700        /* Enable 3D CGCG/CGLS */
4701        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4702                /* write cmd to clear cgcg/cgls ov */
4703                def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4704                /* unset CGCG override */
4705                data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4706                /* update CGCG and CGLS override bits */
4707                if (def != data)
4708                        WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4709
4710                /* enable 3Dcgcg FSM(0x0000363f) */
4711                def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4712
4713                data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4714                        RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4715                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4716                        data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4717                                RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4718                if (def != data)
4719                        WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4720
4721                /* set IDLE_POLL_COUNT(0x00900100) */
4722                def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4723                data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4724                        (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4725                if (def != data)
4726                        WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4727        } else {
4728                /* Disable CGCG/CGLS */
4729                def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4730                /* disable cgcg, cgls should be disabled */
4731                data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4732                          RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4733                /* disable cgcg and cgls in FSM */
4734                if (def != data)
4735                        WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4736        }
4737
4738        amdgpu_gfx_rlc_exit_safe_mode(adev);
4739}
4740
4741static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4742                                                      bool enable)
4743{
4744        uint32_t def, data;
4745
4746        amdgpu_gfx_rlc_enter_safe_mode(adev);
4747
4748        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4749                def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4750                /* unset CGCG override */
4751                data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4752                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4753                        data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4754                else
4755                        data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4756                /* update CGCG and CGLS override bits */
4757                if (def != data)
4758                        WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4759
4760                /* enable cgcg FSM(0x0000363F) */
4761                def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4762
4763                if (adev->asic_type == CHIP_ARCTURUS)
4764                        data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4765                                RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4766                else
4767                        data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4768                                RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4769                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4770                        data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4771                                RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4772                if (def != data)
4773                        WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4774
4775                /* set IDLE_POLL_COUNT(0x00900100) */
4776                def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4777                data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4778                        (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4779                if (def != data)
4780                        WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4781        } else {
4782                def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4783                /* reset CGCG/CGLS bits */
4784                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4785                /* disable cgcg and cgls in FSM */
4786                if (def != data)
4787                        WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4788        }
4789
4790        amdgpu_gfx_rlc_exit_safe_mode(adev);
4791}
4792
4793static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4794                                            bool enable)
4795{
4796        if (enable) {
4797                /* CGCG/CGLS should be enabled after MGCG/MGLS
4798                 * ===  MGCG + MGLS ===
4799                 */
4800                gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4801                /* ===  CGCG /CGLS for GFX 3D Only === */
4802                gfx_v9_0_update_3d_clock_gating(adev, enable);
4803                /* ===  CGCG + CGLS === */
4804                gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4805        } else {
4806                /* CGCG/CGLS should be disabled before MGCG/MGLS
4807                 * ===  CGCG + CGLS ===
4808                 */
4809                gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4810                /* ===  CGCG /CGLS for GFX 3D Only === */
4811                gfx_v9_0_update_3d_clock_gating(adev, enable);
4812                /* ===  MGCG + MGLS === */
4813                gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4814        }
4815        return 0;
4816}
4817
4818static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4819        .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4820        .set_safe_mode = gfx_v9_0_set_safe_mode,
4821        .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4822        .init = gfx_v9_0_rlc_init,
4823        .get_csb_size = gfx_v9_0_get_csb_size,
4824        .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4825        .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4826        .resume = gfx_v9_0_rlc_resume,
4827        .stop = gfx_v9_0_rlc_stop,
4828        .reset = gfx_v9_0_rlc_reset,
4829        .start = gfx_v9_0_rlc_start
4830};
4831
4832static int gfx_v9_0_set_powergating_state(void *handle,
4833                                          enum amd_powergating_state state)
4834{
4835        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836        bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4837
4838        switch (adev->asic_type) {
4839        case CHIP_RAVEN:
4840        case CHIP_RENOIR:
4841                if (!enable) {
4842                        amdgpu_gfx_off_ctrl(adev, false);
4843                        cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4844                }
4845                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4846                        gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4847                        gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4848                } else {
4849                        gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4850                        gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4851                }
4852
4853                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4854                        gfx_v9_0_enable_cp_power_gating(adev, true);
4855                else
4856                        gfx_v9_0_enable_cp_power_gating(adev, false);
4857
4858                /* update gfx cgpg state */
4859                if (is_support_sw_smu(adev) && enable)
4860                        smu_set_gfx_cgpg(&adev->smu, enable);
4861                gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4862
4863                /* update mgcg state */
4864                gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4865
4866                if (enable)
4867                        amdgpu_gfx_off_ctrl(adev, true);
4868                break;
4869        case CHIP_VEGA12:
4870                if (!enable) {
4871                        amdgpu_gfx_off_ctrl(adev, false);
4872                        cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4873                } else {
4874                        amdgpu_gfx_off_ctrl(adev, true);
4875                }
4876                break;
4877        default:
4878                break;
4879        }
4880
4881        return 0;
4882}
4883
4884static int gfx_v9_0_set_clockgating_state(void *handle,
4885                                          enum amd_clockgating_state state)
4886{
4887        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888
4889        if (amdgpu_sriov_vf(adev))
4890                return 0;
4891
4892        switch (adev->asic_type) {
4893        case CHIP_VEGA10:
4894        case CHIP_VEGA12:
4895        case CHIP_VEGA20:
4896        case CHIP_RAVEN:
4897        case CHIP_ARCTURUS:
4898        case CHIP_RENOIR:
4899                gfx_v9_0_update_gfx_clock_gating(adev,
4900                                                 state == AMD_CG_STATE_GATE ? true : false);
4901                break;
4902        default:
4903                break;
4904        }
4905        return 0;
4906}
4907
4908static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4909{
4910        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4911        int data;
4912
4913        if (amdgpu_sriov_vf(adev))
4914                *flags = 0;
4915
4916        /* AMD_CG_SUPPORT_GFX_MGCG */
4917        data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4918        if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4919                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4920
4921        /* AMD_CG_SUPPORT_GFX_CGCG */
4922        data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4923        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4924                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4925
4926        /* AMD_CG_SUPPORT_GFX_CGLS */
4927        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4928                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4929
4930        /* AMD_CG_SUPPORT_GFX_RLC_LS */
4931        data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4932        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4933                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4934
4935        /* AMD_CG_SUPPORT_GFX_CP_LS */
4936        data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4937        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4938                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4939
4940        if (adev->asic_type != CHIP_ARCTURUS) {
4941                /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4942                data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4943                if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4944                        *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4945
4946                /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4947                if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4948                        *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4949        }
4950}
4951
4952static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4953{
4954        return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4955}
4956
4957static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4958{
4959        struct amdgpu_device *adev = ring->adev;
4960        u64 wptr;
4961
4962        /* XXX check if swapping is necessary on BE */
4963        if (ring->use_doorbell) {
4964                wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4965        } else {
4966                wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4967                wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4968        }
4969
4970        return wptr;
4971}
4972
4973static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4974{
4975        struct amdgpu_device *adev = ring->adev;
4976
4977        if (ring->use_doorbell) {
4978                /* XXX check if swapping is necessary on BE */
4979                atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4980                WDOORBELL64(ring->doorbell_index, ring->wptr);
4981        } else {
4982                WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4983                WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4984        }
4985}
4986
4987static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4988{
4989        struct amdgpu_device *adev = ring->adev;
4990        u32 ref_and_mask, reg_mem_engine;
4991        const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4992
4993        if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4994                switch (ring->me) {
4995                case 1:
4996                        ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4997                        break;
4998                case 2:
4999                        ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5000                        break;
5001                default:
5002                        return;
5003                }
5004                reg_mem_engine = 0;
5005        } else {
5006                ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5007                reg_mem_engine = 1; /* pfp */
5008        }
5009
5010        gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5011                              adev->nbio_funcs->get_hdp_flush_req_offset(adev),
5012                              adev->nbio_funcs->get_hdp_flush_done_offset(adev),
5013                              ref_and_mask, ref_and_mask, 0x20);
5014}
5015
5016static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5017                                        struct amdgpu_job *job,
5018                                        struct amdgpu_ib *ib,
5019                                        uint32_t flags)
5020{
5021        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5022        u32 header, control = 0;
5023
5024        if (ib->flags & AMDGPU_IB_FLAG_CE)
5025                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5026        else
5027                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5028
5029        control |= ib->length_dw | (vmid << 24);
5030
5031        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5032                control |= INDIRECT_BUFFER_PRE_ENB(1);
5033
5034                if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5035                        gfx_v9_0_ring_emit_de_meta(ring);
5036        }
5037
5038        amdgpu_ring_write(ring, header);
5039        BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5040        amdgpu_ring_write(ring,
5041#ifdef __BIG_ENDIAN
5042                (2 << 0) |
5043#endif
5044                lower_32_bits(ib->gpu_addr));
5045        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5046        amdgpu_ring_write(ring, control);
5047}
5048
5049static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5050                                          struct amdgpu_job *job,
5051                                          struct amdgpu_ib *ib,
5052                                          uint32_t flags)
5053{
5054        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5055        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5056
5057        /* Currently, there is a high possibility to get wave ID mismatch
5058         * between ME and GDS, leading to a hw deadlock, because ME generates
5059         * different wave IDs than the GDS expects. This situation happens
5060         * randomly when at least 5 compute pipes use GDS ordered append.
5061         * The wave IDs generated by ME are also wrong after suspend/resume.
5062         * Those are probably bugs somewhere else in the kernel driver.
5063         *
5064         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5065         * GDS to 0 for this ring (me/pipe).
5066         */
5067        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5068                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5069                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5070                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5071        }
5072
5073        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5074        BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5075        amdgpu_ring_write(ring,
5076#ifdef __BIG_ENDIAN
5077                                (2 << 0) |
5078#endif
5079                                lower_32_bits(ib->gpu_addr));
5080        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5081        amdgpu_ring_write(ring, control);
5082}
5083
5084static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5085                                     u64 seq, unsigned flags)
5086{
5087        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5088        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5089        bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5090
5091        /* RELEASE_MEM - flush caches, send int */
5092        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5093        amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5094                                               EOP_TC_NC_ACTION_EN) :
5095                                              (EOP_TCL1_ACTION_EN |
5096                                               EOP_TC_ACTION_EN |
5097                                               EOP_TC_WB_ACTION_EN |
5098                                               EOP_TC_MD_ACTION_EN)) |
5099                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5100                                 EVENT_INDEX(5)));
5101        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5102
5103        /*
5104         * the address should be Qword aligned if 64bit write, Dword
5105         * aligned if only send 32bit data low (discard data high)
5106         */
5107        if (write64bit)
5108                BUG_ON(addr & 0x7);
5109        else
5110                BUG_ON(addr & 0x3);
5111        amdgpu_ring_write(ring, lower_32_bits(addr));
5112        amdgpu_ring_write(ring, upper_32_bits(addr));
5113        amdgpu_ring_write(ring, lower_32_bits(seq));
5114        amdgpu_ring_write(ring, upper_32_bits(seq));
5115        amdgpu_ring_write(ring, 0);
5116}
5117
5118static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5119{
5120        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5121        uint32_t seq = ring->fence_drv.sync_seq;
5122        uint64_t addr = ring->fence_drv.gpu_addr;
5123
5124        gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5125                              lower_32_bits(addr), upper_32_bits(addr),
5126                              seq, 0xffffffff, 4);
5127}
5128
5129static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5130                                        unsigned vmid, uint64_t pd_addr)
5131{
5132        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5133
5134        /* compute doesn't have PFP */
5135        if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5136                /* sync PFP to ME, otherwise we might get invalid PFP reads */
5137                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5138                amdgpu_ring_write(ring, 0x0);
5139        }
5140}
5141
5142static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5143{
5144        return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5145}
5146
5147static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5148{
5149        u64 wptr;
5150
5151        /* XXX check if swapping is necessary on BE */
5152        if (ring->use_doorbell)
5153                wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5154        else
5155                BUG();
5156        return wptr;
5157}
5158
5159static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5160                                           bool acquire)
5161{
5162        struct amdgpu_device *adev = ring->adev;
5163        int pipe_num, tmp, reg;
5164        int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5165
5166        pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5167
5168        /* first me only has 2 entries, GFX and HP3D */
5169        if (ring->me > 0)
5170                pipe_num -= 2;
5171
5172        reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5173        tmp = RREG32(reg);
5174        tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5175        WREG32(reg, tmp);
5176}
5177
5178static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5179                                            struct amdgpu_ring *ring,
5180                                            bool acquire)
5181{
5182        int i, pipe;
5183        bool reserve;
5184        struct amdgpu_ring *iring;
5185
5186        mutex_lock(&adev->gfx.pipe_reserve_mutex);
5187        pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5188        if (acquire)
5189                set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5190        else
5191                clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5192
5193        if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5194                /* Clear all reservations - everyone reacquires all resources */
5195                for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5196                        gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5197                                                       true);
5198
5199                for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5200                        gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5201                                                       true);
5202        } else {
5203                /* Lower all pipes without a current reservation */
5204                for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5205                        iring = &adev->gfx.gfx_ring[i];
5206                        pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5207                                                           iring->me,
5208                                                           iring->pipe,
5209                                                           0);
5210                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5211                        gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5212                }
5213
5214                for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5215                        iring = &adev->gfx.compute_ring[i];
5216                        pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5217                                                           iring->me,
5218                                                           iring->pipe,
5219                                                           0);
5220                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5221                        gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5222                }
5223        }
5224
5225        mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5226}
5227
5228static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5229                                      struct amdgpu_ring *ring,
5230                                      bool acquire)
5231{
5232        uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5233        uint32_t queue_priority = acquire ? 0xf : 0x0;
5234
5235        mutex_lock(&adev->srbm_mutex);
5236        soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5237
5238        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5239        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5240
5241        soc15_grbm_select(adev, 0, 0, 0, 0);
5242        mutex_unlock(&adev->srbm_mutex);
5243}
5244
5245static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5246                                               enum drm_sched_priority priority)
5247{
5248        struct amdgpu_device *adev = ring->adev;
5249        bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5250
5251        if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5252                return;
5253
5254        gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5255        gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5256}
5257
5258static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5259{
5260        struct amdgpu_device *adev = ring->adev;
5261
5262        /* XXX check if swapping is necessary on BE */
5263        if (ring->use_doorbell) {
5264                atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5265                WDOORBELL64(ring->doorbell_index, ring->wptr);
5266        } else{
5267                BUG(); /* only DOORBELL method supported on gfx9 now */
5268        }
5269}
5270
5271static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5272                                         u64 seq, unsigned int flags)
5273{
5274        struct amdgpu_device *adev = ring->adev;
5275
5276        /* we only allocate 32bit for each seq wb address */
5277        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5278
5279        /* write fence seq to the "addr" */
5280        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5281        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5282                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5283        amdgpu_ring_write(ring, lower_32_bits(addr));
5284        amdgpu_ring_write(ring, upper_32_bits(addr));
5285        amdgpu_ring_write(ring, lower_32_bits(seq));
5286
5287        if (flags & AMDGPU_FENCE_FLAG_INT) {
5288                /* set register to trigger INT */
5289                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5290                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5291                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5292                amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5293                amdgpu_ring_write(ring, 0);
5294                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5295        }
5296}
5297
5298static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5299{
5300        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5301        amdgpu_ring_write(ring, 0);
5302}
5303
5304static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5305{
5306        struct v9_ce_ib_state ce_payload = {0};
5307        uint64_t csa_addr;
5308        int cnt;
5309
5310        cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5311        csa_addr = amdgpu_csa_vaddr(ring->adev);
5312
5313        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5314        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5315                                 WRITE_DATA_DST_SEL(8) |
5316                                 WR_CONFIRM) |
5317                                 WRITE_DATA_CACHE_POLICY(0));
5318        amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5319        amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5320        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5321}
5322
5323static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5324{
5325        struct v9_de_ib_state de_payload = {0};
5326        uint64_t csa_addr, gds_addr;
5327        int cnt;
5328
5329        csa_addr = amdgpu_csa_vaddr(ring->adev);
5330        gds_addr = csa_addr + 4096;
5331        de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5332        de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5333
5334        cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5335        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5336        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5337                                 WRITE_DATA_DST_SEL(8) |
5338                                 WR_CONFIRM) |
5339                                 WRITE_DATA_CACHE_POLICY(0));
5340        amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5341        amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5342        amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5343}
5344
5345static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5346{
5347        amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5348        amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5349}
5350
5351static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5352{
5353        uint32_t dw2 = 0;
5354
5355        if (amdgpu_sriov_vf(ring->adev))
5356                gfx_v9_0_ring_emit_ce_meta(ring);
5357
5358        gfx_v9_0_ring_emit_tmz(ring, true);
5359
5360        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5361        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5362                /* set load_global_config & load_global_uconfig */
5363                dw2 |= 0x8001;
5364                /* set load_cs_sh_regs */
5365                dw2 |= 0x01000000;
5366                /* set load_per_context_state & load_gfx_sh_regs for GFX */
5367                dw2 |= 0x10002;
5368
5369                /* set load_ce_ram if preamble presented */
5370                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5371                        dw2 |= 0x10000000;
5372        } else {
5373                /* still load_ce_ram if this is the first time preamble presented
5374                 * although there is no context switch happens.
5375                 */
5376                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5377                        dw2 |= 0x10000000;
5378        }
5379
5380        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5381        amdgpu_ring_write(ring, dw2);
5382        amdgpu_ring_write(ring, 0);
5383}
5384
5385static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5386{
5387        unsigned ret;
5388        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5389        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5390        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5391        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5392        ret = ring->wptr & ring->buf_mask;
5393        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5394        return ret;
5395}
5396
5397static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5398{
5399        unsigned cur;
5400        BUG_ON(offset > ring->buf_mask);
5401        BUG_ON(ring->ring[offset] != 0x55aa55aa);
5402
5403        cur = (ring->wptr & ring->buf_mask) - 1;
5404        if (likely(cur > offset))
5405                ring->ring[offset] = cur - offset;
5406        else
5407                ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5408}
5409
5410static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5411{
5412        struct amdgpu_device *adev = ring->adev;
5413
5414        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5415        amdgpu_ring_write(ring, 0 |     /* src: register*/
5416                                (5 << 8) |      /* dst: memory */
5417                                (1 << 20));     /* write confirm */
5418        amdgpu_ring_write(ring, reg);
5419        amdgpu_ring_write(ring, 0);
5420        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5421                                adev->virt.reg_val_offs * 4));
5422        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5423                                adev->virt.reg_val_offs * 4));
5424}
5425
5426static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5427                                    uint32_t val)
5428{
5429        uint32_t cmd = 0;
5430
5431        switch (ring->funcs->type) {
5432        case AMDGPU_RING_TYPE_GFX:
5433                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5434                break;
5435        case AMDGPU_RING_TYPE_KIQ:
5436                cmd = (1 << 16); /* no inc addr */
5437                break;
5438        default:
5439                cmd = WR_CONFIRM;
5440                break;
5441        }
5442        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5443        amdgpu_ring_write(ring, cmd);
5444        amdgpu_ring_write(ring, reg);
5445        amdgpu_ring_write(ring, 0);
5446        amdgpu_ring_write(ring, val);
5447}
5448
5449static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5450                                        uint32_t val, uint32_t mask)
5451{
5452        gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5453}
5454
5455static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5456                                                  uint32_t reg0, uint32_t reg1,
5457                                                  uint32_t ref, uint32_t mask)
5458{
5459        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5460        struct amdgpu_device *adev = ring->adev;
5461        bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5462                adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5463
5464        if (fw_version_ok)
5465                gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5466                                      ref, mask, 0x20);
5467        else
5468                amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5469                                                           ref, mask);
5470}
5471
5472static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5473{
5474        struct amdgpu_device *adev = ring->adev;
5475        uint32_t value = 0;
5476
5477        value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5478        value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5479        value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5480        value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5481        WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5482}
5483
5484static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5485                                                 enum amdgpu_interrupt_state state)
5486{
5487        switch (state) {
5488        case AMDGPU_IRQ_STATE_DISABLE:
5489        case AMDGPU_IRQ_STATE_ENABLE:
5490                WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5491                               TIME_STAMP_INT_ENABLE,
5492                               state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5493                break;
5494        default:
5495                break;
5496        }
5497}
5498
5499static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5500                                                     int me, int pipe,
5501                                                     enum amdgpu_interrupt_state state)
5502{
5503        u32 mec_int_cntl, mec_int_cntl_reg;
5504
5505        /*
5506         * amdgpu controls only the first MEC. That's why this function only
5507         * handles the setting of interrupts for this specific MEC. All other
5508         * pipes' interrupts are set by amdkfd.
5509         */
5510
5511        if (me == 1) {
5512                switch (pipe) {
5513                case 0:
5514                        mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5515                        break;
5516                case 1:
5517                        mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5518                        break;
5519                case 2:
5520                        mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5521                        break;
5522                case 3:
5523                        mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5524                        break;
5525                default:
5526                        DRM_DEBUG("invalid pipe %d\n", pipe);
5527                        return;
5528                }
5529        } else {
5530                DRM_DEBUG("invalid me %d\n", me);
5531                return;
5532        }
5533
5534        switch (state) {
5535        case AMDGPU_IRQ_STATE_DISABLE:
5536                mec_int_cntl = RREG32(mec_int_cntl_reg);
5537                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5538                                             TIME_STAMP_INT_ENABLE, 0);
5539                WREG32(mec_int_cntl_reg, mec_int_cntl);
5540                break;
5541        case AMDGPU_IRQ_STATE_ENABLE:
5542                mec_int_cntl = RREG32(mec_int_cntl_reg);
5543                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5544                                             TIME_STAMP_INT_ENABLE, 1);
5545                WREG32(mec_int_cntl_reg, mec_int_cntl);
5546                break;
5547        default:
5548                break;
5549        }
5550}
5551
5552static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5553                                             struct amdgpu_irq_src *source,
5554                                             unsigned type,
5555                                             enum amdgpu_interrupt_state state)
5556{
5557        switch (state) {
5558        case AMDGPU_IRQ_STATE_DISABLE:
5559        case AMDGPU_IRQ_STATE_ENABLE:
5560                WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5561                               PRIV_REG_INT_ENABLE,
5562                               state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5563                break;
5564        default:
5565                break;
5566        }
5567
5568        return 0;
5569}
5570
5571static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5572                                              struct amdgpu_irq_src *source,
5573                                              unsigned type,
5574                                              enum amdgpu_interrupt_state state)
5575{
5576        switch (state) {
5577        case AMDGPU_IRQ_STATE_DISABLE:
5578        case AMDGPU_IRQ_STATE_ENABLE:
5579                WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5580                               PRIV_INSTR_INT_ENABLE,
5581                               state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5582        default:
5583                break;
5584        }
5585
5586        return 0;
5587}
5588
5589#define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5590        WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5591                        CP_ECC_ERROR_INT_ENABLE, 1)
5592
5593#define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5594        WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5595                        CP_ECC_ERROR_INT_ENABLE, 0)
5596
5597static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5598                                              struct amdgpu_irq_src *source,
5599                                              unsigned type,
5600                                              enum amdgpu_interrupt_state state)
5601{
5602        switch (state) {
5603        case AMDGPU_IRQ_STATE_DISABLE:
5604                WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5605                                CP_ECC_ERROR_INT_ENABLE, 0);
5606                DISABLE_ECC_ON_ME_PIPE(1, 0);
5607                DISABLE_ECC_ON_ME_PIPE(1, 1);
5608                DISABLE_ECC_ON_ME_PIPE(1, 2);
5609                DISABLE_ECC_ON_ME_PIPE(1, 3);
5610                break;
5611
5612        case AMDGPU_IRQ_STATE_ENABLE:
5613                WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5614                                CP_ECC_ERROR_INT_ENABLE, 1);
5615                ENABLE_ECC_ON_ME_PIPE(1, 0);
5616                ENABLE_ECC_ON_ME_PIPE(1, 1);
5617                ENABLE_ECC_ON_ME_PIPE(1, 2);
5618                ENABLE_ECC_ON_ME_PIPE(1, 3);
5619                break;
5620        default:
5621                break;
5622        }
5623
5624        return 0;
5625}
5626
5627
5628static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5629                                            struct amdgpu_irq_src *src,
5630                                            unsigned type,
5631                                            enum amdgpu_interrupt_state state)
5632{
5633        switch (type) {
5634        case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5635                gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5636                break;
5637        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5638                gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5639                break;
5640        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5641                gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5642                break;
5643        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5644                gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5645                break;
5646        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5647                gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5648                break;
5649        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5650                gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5651                break;
5652        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5653                gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5654                break;
5655        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5656                gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5657                break;
5658        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5659                gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5660                break;
5661        default:
5662                break;
5663        }
5664        return 0;
5665}
5666
5667static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5668                            struct amdgpu_irq_src *source,
5669                            struct amdgpu_iv_entry *entry)
5670{
5671        int i;
5672        u8 me_id, pipe_id, queue_id;
5673        struct amdgpu_ring *ring;
5674
5675        DRM_DEBUG("IH: CP EOP\n");
5676        me_id = (entry->ring_id & 0x0c) >> 2;
5677        pipe_id = (entry->ring_id & 0x03) >> 0;
5678        queue_id = (entry->ring_id & 0x70) >> 4;
5679
5680        switch (me_id) {
5681        case 0:
5682                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5683                break;
5684        case 1:
5685        case 2:
5686                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5687                        ring = &adev->gfx.compute_ring[i];
5688                        /* Per-queue interrupt is supported for MEC starting from VI.
5689                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
5690                          */
5691                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5692                                amdgpu_fence_process(ring);
5693                }
5694                break;
5695        }
5696        return 0;
5697}
5698
5699static void gfx_v9_0_fault(struct amdgpu_device *adev,
5700                           struct amdgpu_iv_entry *entry)
5701{
5702        u8 me_id, pipe_id, queue_id;
5703        struct amdgpu_ring *ring;
5704        int i;
5705
5706        me_id = (entry->ring_id & 0x0c) >> 2;
5707        pipe_id = (entry->ring_id & 0x03) >> 0;
5708        queue_id = (entry->ring_id & 0x70) >> 4;
5709
5710        switch (me_id) {
5711        case 0:
5712                drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5713                break;
5714        case 1:
5715        case 2:
5716                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5717                        ring = &adev->gfx.compute_ring[i];
5718                        if (ring->me == me_id && ring->pipe == pipe_id &&
5719                            ring->queue == queue_id)
5720                                drm_sched_fault(&ring->sched);
5721                }
5722                break;
5723        }
5724}
5725
5726static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5727                                 struct amdgpu_irq_src *source,
5728                                 struct amdgpu_iv_entry *entry)
5729{
5730        DRM_ERROR("Illegal register access in command stream\n");
5731        gfx_v9_0_fault(adev, entry);
5732        return 0;
5733}
5734
5735static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5736                                  struct amdgpu_irq_src *source,
5737                                  struct amdgpu_iv_entry *entry)
5738{
5739        DRM_ERROR("Illegal instruction in command stream\n");
5740        gfx_v9_0_fault(adev, entry);
5741        return 0;
5742}
5743
5744static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5745                struct ras_err_data *err_data,
5746                struct amdgpu_iv_entry *entry)
5747{
5748        /* TODO ue will trigger an interrupt. */
5749        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5750        if (adev->gfx.funcs->query_ras_error_count)
5751                adev->gfx.funcs->query_ras_error_count(adev, err_data);
5752        amdgpu_ras_reset_gpu(adev, 0);
5753        return AMDGPU_RAS_SUCCESS;
5754}
5755
5756static const struct {
5757        const char *name;
5758        uint32_t ip;
5759        uint32_t inst;
5760        uint32_t seg;
5761        uint32_t reg_offset;
5762        uint32_t per_se_instance;
5763        int32_t num_instance;
5764        uint32_t sec_count_mask;
5765        uint32_t ded_count_mask;
5766} gfx_ras_edc_regs[] = {
5767        { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5768          REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5769          REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5770        { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5771          REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5772          REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5773        { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5774          REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5775        { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5776          REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5777        { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5778          REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5779          REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5780        { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5781          REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5782        { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5783          REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5784          REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5785        { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5786          REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5787          REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5788        { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5789          REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5790        { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5791          REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5792        { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5793          REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5794        { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5795          REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5796          REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5797        { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5798          REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5799        { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5800          0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5801          REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5802        { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5803          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5804          REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5805          REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5806        { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5807          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5808          REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5809        { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5810          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5811          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5812          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5813        { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5814          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5815          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5816          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5817        { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5818          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5819          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5820          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5821        { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5822          SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5823          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5824          REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5825        { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5826          REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5827        { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5828          REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5829          REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5830        { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5831          REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5832        { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5833          REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5834        { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5835          REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5836        { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5837          REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5838        { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5839          REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5840        { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5841          REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5842        { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5843          REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5844          REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5845        { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5846          REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5847          REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5848        { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5849          REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5850          REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5851        { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5852          REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5853          REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5854        { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5855          REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5856          REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5857        { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5858          REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5859        { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5860          REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5861        { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5862          REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5863        { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5864          REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5865        { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5866          REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5867        { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5868          REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5869        { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5870          REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5871        { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5872          REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5873        { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5874          16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5875        { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5876          0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5877          0 },
5878        { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5879          16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5880        { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5881          0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5882          0 },
5883        { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5884          16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5885        { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5886          REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5887        { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5888          REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5889          REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5890        { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5891          REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5892          REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5893        { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5894          REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5895        { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5896          REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5897        { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5898          REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5899        { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5900          REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5901          REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5902        { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5903          REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5904          REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5905        { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5906          REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5907          REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5908        { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5909          REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5910          REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5911        { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5912          REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5913        { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5914          REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5915          REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5916        { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5917          REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5918          REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5919        { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5920          REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5921          REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5922        { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5923          REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5924          REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5925        { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5926          REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5927          REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5928        { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5929          REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5930          REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5931        { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5932          REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5933          REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5934        { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5935          1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5936          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5937        { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5938          6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5939          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5940        { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5941          1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5942          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5943        { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5944          6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5945          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5946        { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5947          1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5948          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5949        { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5950          6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5951          REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5952        { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5953          6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5954          REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5955        { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5956          6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5957          REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5958        { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5959          6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5960          REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5961        { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5962          6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5963          REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5964        { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5965          SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5966          REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5967          0 },
5968        { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5969          6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5970        { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5971          6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5972        { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5973          6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5974        { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5975          SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5976          REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5977        { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5978          REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5979          REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5980        { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5981          6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5982          REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5983        { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5984          6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5985          REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5986        { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5987          6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5988          REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5989        { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5990          6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5991          REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5992        { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5993          SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5994          REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5995          0 },
5996        { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5997          6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5998        { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5999          6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
6000        { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
6001          6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
6002        { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
6003          SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
6004          REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
6005        { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6006          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6007          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
6008        { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6009          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6010          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
6011        { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6012          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6013          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
6014        { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6015          REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6016          REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6017        { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6018          REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6019          REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6020        { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6021          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6022        { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6023          REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6024        { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6025          REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6026        { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6027          REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6028        { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6029          REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6030        { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6031          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6032          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6033        { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6034          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6035          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6036        { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6037          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6038          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6039        { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6040          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6041        { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6042          REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6043        { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6044          REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6045        { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6046          REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6047        { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6048          REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6049        { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6050          REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6051};
6052
6053static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6054                                     void *inject_if)
6055{
6056        struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6057        int ret;
6058        struct ta_ras_trigger_error_input block_info = { 0 };
6059
6060        if (adev->asic_type != CHIP_VEGA20)
6061                return -EINVAL;
6062
6063        if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6064                return -EINVAL;
6065
6066        if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6067                return -EPERM;
6068
6069        if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6070              info->head.type)) {
6071                DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6072                        ras_gfx_subblocks[info->head.sub_block_index].name,
6073                        info->head.type);
6074                return -EPERM;
6075        }
6076
6077        if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6078              info->head.type)) {
6079                DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6080                        ras_gfx_subblocks[info->head.sub_block_index].name,
6081                        info->head.type);
6082                return -EPERM;
6083        }
6084
6085        block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6086        block_info.sub_block_index =
6087                ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6088        block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6089        block_info.address = info->address;
6090        block_info.value = info->value;
6091
6092        mutex_lock(&adev->grbm_idx_mutex);
6093        ret = psp_ras_trigger_error(&adev->psp, &block_info);
6094        mutex_unlock(&adev->grbm_idx_mutex);
6095
6096        return ret;
6097}
6098
6099static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6100                                          void *ras_error_status)
6101{
6102        struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6103        uint32_t sec_count, ded_count;
6104        uint32_t i;
6105        uint32_t reg_value;
6106        uint32_t se_id, instance_id;
6107
6108        if (adev->asic_type != CHIP_VEGA20)
6109                return -EINVAL;
6110
6111        err_data->ue_count = 0;
6112        err_data->ce_count = 0;
6113
6114        mutex_lock(&adev->grbm_idx_mutex);
6115        for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6116                for (instance_id = 0; instance_id < 256; instance_id++) {
6117                        for (i = 0;
6118                             i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6119                             i++) {
6120                                if (se_id != 0 &&
6121                                    !gfx_ras_edc_regs[i].per_se_instance)
6122                                        continue;
6123                                if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6124                                        continue;
6125
6126                                gfx_v9_0_select_se_sh(adev, se_id, 0,
6127                                                      instance_id);
6128
6129                                reg_value = RREG32(
6130                                        adev->reg_offset[gfx_ras_edc_regs[i].ip]
6131                                                        [gfx_ras_edc_regs[i].inst]
6132                                                        [gfx_ras_edc_regs[i].seg] +
6133                                        gfx_ras_edc_regs[i].reg_offset);
6134                                sec_count = reg_value &
6135                                            gfx_ras_edc_regs[i].sec_count_mask;
6136                                ded_count = reg_value &
6137                                            gfx_ras_edc_regs[i].ded_count_mask;
6138                                if (sec_count) {
6139                                        DRM_INFO(
6140                                                "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6141                                                se_id, instance_id,
6142                                                gfx_ras_edc_regs[i].name,
6143                                                sec_count);
6144                                        err_data->ce_count++;
6145                                }
6146
6147                                if (ded_count) {
6148                                        DRM_INFO(
6149                                                "Instance[%d][%d]: SubBlock %s, DED %d\n",
6150                                                se_id, instance_id,
6151                                                gfx_ras_edc_regs[i].name,
6152                                                ded_count);
6153                                        err_data->ue_count++;
6154                                }
6155                        }
6156                }
6157        }
6158        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6159        mutex_unlock(&adev->grbm_idx_mutex);
6160
6161        return 0;
6162}
6163
6164static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6165                                  struct amdgpu_irq_src *source,
6166                                  struct amdgpu_iv_entry *entry)
6167{
6168        struct ras_common_if *ras_if = adev->gfx.ras_if;
6169        struct ras_dispatch_if ih_data = {
6170                .entry = entry,
6171        };
6172
6173        if (!ras_if)
6174                return 0;
6175
6176        ih_data.head = *ras_if;
6177
6178        DRM_ERROR("CP ECC ERROR IRQ\n");
6179        amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6180        return 0;
6181}
6182
6183static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6184        .name = "gfx_v9_0",
6185        .early_init = gfx_v9_0_early_init,
6186        .late_init = gfx_v9_0_late_init,
6187        .sw_init = gfx_v9_0_sw_init,
6188        .sw_fini = gfx_v9_0_sw_fini,
6189        .hw_init = gfx_v9_0_hw_init,
6190        .hw_fini = gfx_v9_0_hw_fini,
6191        .suspend = gfx_v9_0_suspend,
6192        .resume = gfx_v9_0_resume,
6193        .is_idle = gfx_v9_0_is_idle,
6194        .wait_for_idle = gfx_v9_0_wait_for_idle,
6195        .soft_reset = gfx_v9_0_soft_reset,
6196        .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6197        .set_powergating_state = gfx_v9_0_set_powergating_state,
6198        .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6199};
6200
6201static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6202        .type = AMDGPU_RING_TYPE_GFX,
6203        .align_mask = 0xff,
6204        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6205        .support_64bit_ptrs = true,
6206        .vmhub = AMDGPU_GFXHUB_0,
6207        .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6208        .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6209        .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6210        .emit_frame_size = /* totally 242 maximum if 16 IBs */
6211                5 +  /* COND_EXEC */
6212                7 +  /* PIPELINE_SYNC */
6213                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6214                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6215                2 + /* VM_FLUSH */
6216                8 +  /* FENCE for VM_FLUSH */
6217                20 + /* GDS switch */
6218                4 + /* double SWITCH_BUFFER,
6219                       the first COND_EXEC jump to the place just
6220                           prior to this double SWITCH_BUFFER  */
6221                5 + /* COND_EXEC */
6222                7 +      /*     HDP_flush */
6223                4 +      /*     VGT_flush */
6224                14 + /* CE_META */
6225                31 + /* DE_META */
6226                3 + /* CNTX_CTRL */
6227                5 + /* HDP_INVL */
6228                8 + 8 + /* FENCE x2 */
6229                2, /* SWITCH_BUFFER */
6230        .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6231        .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6232        .emit_fence = gfx_v9_0_ring_emit_fence,
6233        .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6234        .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6235        .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6236        .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6237        .test_ring = gfx_v9_0_ring_test_ring,
6238        .test_ib = gfx_v9_0_ring_test_ib,
6239        .insert_nop = amdgpu_ring_insert_nop,
6240        .pad_ib = amdgpu_ring_generic_pad_ib,
6241        .emit_switch_buffer = gfx_v9_ring_emit_sb,
6242        .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6243        .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6244        .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6245        .emit_tmz = gfx_v9_0_ring_emit_tmz,
6246        .emit_wreg = gfx_v9_0_ring_emit_wreg,
6247        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6248        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6249        .soft_recovery = gfx_v9_0_ring_soft_recovery,
6250};
6251
6252static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6253        .type = AMDGPU_RING_TYPE_COMPUTE,
6254        .align_mask = 0xff,
6255        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6256        .support_64bit_ptrs = true,
6257        .vmhub = AMDGPU_GFXHUB_0,
6258        .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6259        .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6260        .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6261        .emit_frame_size =
6262                20 + /* gfx_v9_0_ring_emit_gds_switch */
6263                7 + /* gfx_v9_0_ring_emit_hdp_flush */
6264                5 + /* hdp invalidate */
6265                7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6266                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6267                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6268                2 + /* gfx_v9_0_ring_emit_vm_flush */
6269                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6270        .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6271        .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6272        .emit_fence = gfx_v9_0_ring_emit_fence,
6273        .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6274        .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6275        .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6276        .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6277        .test_ring = gfx_v9_0_ring_test_ring,
6278        .test_ib = gfx_v9_0_ring_test_ib,
6279        .insert_nop = amdgpu_ring_insert_nop,
6280        .pad_ib = amdgpu_ring_generic_pad_ib,
6281        .set_priority = gfx_v9_0_ring_set_priority_compute,
6282        .emit_wreg = gfx_v9_0_ring_emit_wreg,
6283        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6284        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6285};
6286
6287static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6288        .type = AMDGPU_RING_TYPE_KIQ,
6289        .align_mask = 0xff,
6290        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6291        .support_64bit_ptrs = true,
6292        .vmhub = AMDGPU_GFXHUB_0,
6293        .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6294        .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6295        .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6296        .emit_frame_size =
6297                20 + /* gfx_v9_0_ring_emit_gds_switch */
6298                7 + /* gfx_v9_0_ring_emit_hdp_flush */
6299                5 + /* hdp invalidate */
6300                7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6301                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6302                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6303                2 + /* gfx_v9_0_ring_emit_vm_flush */
6304                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6305        .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6306        .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6307        .test_ring = gfx_v9_0_ring_test_ring,
6308        .insert_nop = amdgpu_ring_insert_nop,
6309        .pad_ib = amdgpu_ring_generic_pad_ib,
6310        .emit_rreg = gfx_v9_0_ring_emit_rreg,
6311        .emit_wreg = gfx_v9_0_ring_emit_wreg,
6312        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6313        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6314};
6315
6316static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6317{
6318        int i;
6319
6320        adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6321
6322        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6323                adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6324
6325        for (i = 0; i < adev->gfx.num_compute_rings; i++)
6326                adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6327}
6328
6329static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6330        .set = gfx_v9_0_set_eop_interrupt_state,
6331        .process = gfx_v9_0_eop_irq,
6332};
6333
6334static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6335        .set = gfx_v9_0_set_priv_reg_fault_state,
6336        .process = gfx_v9_0_priv_reg_irq,
6337};
6338
6339static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6340        .set = gfx_v9_0_set_priv_inst_fault_state,
6341        .process = gfx_v9_0_priv_inst_irq,
6342};
6343
6344static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6345        .set = gfx_v9_0_set_cp_ecc_error_state,
6346        .process = gfx_v9_0_cp_ecc_error_irq,
6347};
6348
6349
6350static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6351{
6352        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6353        adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6354
6355        adev->gfx.priv_reg_irq.num_types = 1;
6356        adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6357
6358        adev->gfx.priv_inst_irq.num_types = 1;
6359        adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6360
6361        adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6362        adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6363}
6364
6365static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6366{
6367        switch (adev->asic_type) {
6368        case CHIP_VEGA10:
6369        case CHIP_VEGA12:
6370        case CHIP_VEGA20:
6371        case CHIP_RAVEN:
6372        case CHIP_ARCTURUS:
6373        case CHIP_RENOIR:
6374                adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6375                break;
6376        default:
6377                break;
6378        }
6379}
6380
6381static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6382{
6383        /* init asci gds info */
6384        switch (adev->asic_type) {
6385        case CHIP_VEGA10:
6386        case CHIP_VEGA12:
6387        case CHIP_VEGA20:
6388                adev->gds.gds_size = 0x10000;
6389                break;
6390        case CHIP_RAVEN:
6391        case CHIP_ARCTURUS:
6392                adev->gds.gds_size = 0x1000;
6393                break;
6394        default:
6395                adev->gds.gds_size = 0x10000;
6396                break;
6397        }
6398
6399        switch (adev->asic_type) {
6400        case CHIP_VEGA10:
6401        case CHIP_VEGA20:
6402                adev->gds.gds_compute_max_wave_id = 0x7ff;
6403                break;
6404        case CHIP_VEGA12:
6405                adev->gds.gds_compute_max_wave_id = 0x27f;
6406                break;
6407        case CHIP_RAVEN:
6408                if (adev->rev_id >= 0x8)
6409                        adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6410                else
6411                        adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6412                break;
6413        case CHIP_ARCTURUS:
6414                adev->gds.gds_compute_max_wave_id = 0xfff;
6415                break;
6416        default:
6417                /* this really depends on the chip */
6418                adev->gds.gds_compute_max_wave_id = 0x7ff;
6419                break;
6420        }
6421
6422        adev->gds.gws_size = 64;
6423        adev->gds.oa_size = 16;
6424}
6425
6426static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6427                                                 u32 bitmap)
6428{
6429        u32 data;
6430
6431        if (!bitmap)
6432                return;
6433
6434        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6435        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6436
6437        WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6438}
6439
6440static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6441{
6442        u32 data, mask;
6443
6444        data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6445        data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6446
6447        data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6448        data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6449
6450        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6451
6452        return (~data) & mask;
6453}
6454
6455static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6456                                 struct amdgpu_cu_info *cu_info)
6457{
6458        int i, j, k, counter, active_cu_number = 0;
6459        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6460        unsigned disable_masks[4 * 4];
6461
6462        if (!adev || !cu_info)
6463                return -EINVAL;
6464
6465        /*
6466         * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6467         */
6468        if (adev->gfx.config.max_shader_engines *
6469                adev->gfx.config.max_sh_per_se > 16)
6470                return -EINVAL;
6471
6472        amdgpu_gfx_parse_disable_cu(disable_masks,
6473                                    adev->gfx.config.max_shader_engines,
6474                                    adev->gfx.config.max_sh_per_se);
6475
6476        mutex_lock(&adev->grbm_idx_mutex);
6477        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6478                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6479                        mask = 1;
6480                        ao_bitmap = 0;
6481                        counter = 0;
6482                        gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6483                        gfx_v9_0_set_user_cu_inactive_bitmap(
6484                                adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6485                        bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6486
6487                        /*
6488                         * The bitmap(and ao_cu_bitmap) in cu_info structure is
6489                         * 4x4 size array, and it's usually suitable for Vega
6490                         * ASICs which has 4*2 SE/SH layout.
6491                         * But for Arcturus, SE/SH layout is changed to 8*1.
6492                         * To mostly reduce the impact, we make it compatible
6493                         * with current bitmap array as below:
6494                         *    SE4,SH0 --> bitmap[0][1]
6495                         *    SE5,SH0 --> bitmap[1][1]
6496                         *    SE6,SH0 --> bitmap[2][1]
6497                         *    SE7,SH0 --> bitmap[3][1]
6498                         */
6499                        cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6500
6501                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6502                                if (bitmap & mask) {
6503                                        if (counter < adev->gfx.config.max_cu_per_sh)
6504                                                ao_bitmap |= mask;
6505                                        counter ++;
6506                                }
6507                                mask <<= 1;
6508                        }
6509                        active_cu_number += counter;
6510                        if (i < 2 && j < 2)
6511                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6512                        cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6513                }
6514        }
6515        gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6516        mutex_unlock(&adev->grbm_idx_mutex);
6517
6518        cu_info->number = active_cu_number;
6519        cu_info->ao_cu_mask = ao_cu_mask;
6520        cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6521
6522        return 0;
6523}
6524
6525const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6526{
6527        .type = AMD_IP_BLOCK_TYPE_GFX,
6528        .major = 9,
6529        .minor = 0,
6530        .rev = 0,
6531        .funcs = &gfx_v9_0_ip_funcs,
6532};
6533