linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include "drmP.h"
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
  27#include "vi.h"
  28#include "vid.h"
  29#include "amdgpu_ucode.h"
  30#include "clearstate_vi.h"
  31
  32#include "gmc/gmc_8_2_d.h"
  33#include "gmc/gmc_8_2_sh_mask.h"
  34
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37
  38#include "bif/bif_5_0_d.h"
  39#include "bif/bif_5_0_sh_mask.h"
  40
  41#include "gca/gfx_8_0_d.h"
  42#include "gca/gfx_8_0_enum.h"
  43#include "gca/gfx_8_0_sh_mask.h"
  44#include "gca/gfx_8_0_enum.h"
  45
  46#include "uvd/uvd_5_0_d.h"
  47#include "uvd/uvd_5_0_sh_mask.h"
  48
  49#include "dce/dce_10_0_d.h"
  50#include "dce/dce_10_0_sh_mask.h"
  51
  52#define GFX8_NUM_GFX_RINGS     1
  53#define GFX8_NUM_COMPUTE_RINGS 8
  54
  55#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  56#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  57#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  58
  59#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  60#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  61#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  62#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  63#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  64#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  65#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  66#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  67#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  68
  69#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  70#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  71#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  72#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  73#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  74#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  75
  76/* BPM SERDES CMD */
  77#define SET_BPM_SERDES_CMD    1
  78#define CLE_BPM_SERDES_CMD    0
  79
  80/* BPM Register Address*/
  81enum {
  82        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  83        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  84        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  85        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  86        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  87        BPM_REG_FGCG_MAX
  88};
  89
  90MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  91MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  92MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  93MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  94MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
  95MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
  96
  97MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
  98MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
  99MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 100MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 101MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 102
 103MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 104MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 105MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 106MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 107MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 108MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 109
 110MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 111MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 112MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 113MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 114MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 115
 116MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 117MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 118MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 119MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 120MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 121MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 122
 123static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 124{
 125        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 126        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 127        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 128        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 129        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 130        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 131        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 132        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 133        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 134        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 135        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 136        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 137        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 138        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 139        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 140        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 141};
 142
 143static const u32 golden_settings_tonga_a11[] =
 144{
 145        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 146        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 147        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 148        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 149        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 150        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 151        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 152        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 153        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 154        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 155        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 156        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 157        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 158        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 159        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 160};
 161
 162static const u32 tonga_golden_common_all[] =
 163{
 164        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 165        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 166        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 167        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 168        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 169        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 170        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 171        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 172};
 173
 174static const u32 tonga_mgcg_cgcg_init[] =
 175{
 176        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 177        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 178        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 179        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 180        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 181        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 182        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 183        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 184        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 185        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 186        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 187        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 188        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 189        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 190        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 191        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 192        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 193        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 194        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 195        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 196        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 197        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 198        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 199        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 200        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 201        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 202        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 203        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 204        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 205        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 206        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 207        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 208        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 209        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 210        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 211        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 212        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 213        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 214        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 215        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 216        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 217        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 218        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 219        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 220        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 221        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 222        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 223        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 224        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 225        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 226        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 227        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 228        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 229        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 230        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 231        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 232        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 233        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 234        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 235        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 236        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 237        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 238        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 239        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 240        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 241        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 242        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 243        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 244        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 245        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 246        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 247        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 248        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 249        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 250        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 251};
 252
 253static const u32 fiji_golden_common_all[] =
 254{
 255        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 256        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 257        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 258        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 259        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 260        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 261        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 262        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 263        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 264        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 265};
 266
 267static const u32 golden_settings_fiji_a10[] =
 268{
 269        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 270        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 271        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 272        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 273        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 274        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 275        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 276        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 277        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 278        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 279        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 280};
 281
 282static const u32 fiji_mgcg_cgcg_init[] =
 283{
 284        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 285        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 286        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 287        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 288        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 289        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 290        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 291        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 292        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 293        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 294        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 295        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 296        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 297        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 298        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 299        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 300        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 301        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 302        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 303        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 304        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 305        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 306        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 307        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 308        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 309        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 310        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 311        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 312        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 313        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 314        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 315        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 316        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 317        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 318        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 319};
 320
 321static const u32 golden_settings_iceland_a11[] =
 322{
 323        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 324        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 325        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 326        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 327        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 328        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 329        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 330        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 331        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 332        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 333        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 334        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 335        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 336        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 337        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 338};
 339
 340static const u32 iceland_golden_common_all[] =
 341{
 342        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 343        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 344        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 345        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 346        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 347        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 348        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 349        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 350};
 351
 352static const u32 iceland_mgcg_cgcg_init[] =
 353{
 354        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 355        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 356        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 357        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 358        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 359        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 360        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 361        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 362        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 363        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 364        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 365        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 366        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 367        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 368        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 369        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 370        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 371        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 372        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 373        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 374        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 375        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 376        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 377        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 378        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 379        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 380        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 381        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 382        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 383        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 384        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 385        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 386        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 387        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 388        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 389        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 390        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 391        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 392        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 393        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 394        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 395        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 396        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 397        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 398        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 399        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 400        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 401        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 402        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 403        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 404        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 405        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 406        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 407        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 408        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 409        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 410        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 411        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 412        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 413        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 414        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 415        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 416        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 417        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 418};
 419
 420static const u32 cz_golden_settings_a11[] =
 421{
 422        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 423        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 424        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 425        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 426        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 427        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 428        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 429        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 430        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 431        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 432};
 433
 434static const u32 cz_golden_common_all[] =
 435{
 436        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 437        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 438        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 439        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 440        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 441        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 442        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 443        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 444};
 445
 446static const u32 cz_mgcg_cgcg_init[] =
 447{
 448        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 449        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 450        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 451        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 452        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 453        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 454        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 455        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 456        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 457        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 458        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 459        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 460        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 461        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 462        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 463        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 464        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 465        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 466        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 467        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 468        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 469        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 470        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 471        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 472        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 473        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 474        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 475        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 476        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 477        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 478        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 479        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 480        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 481        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 482        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 483        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 484        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 485        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 486        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 487        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 488        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 489        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 490        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 491        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 492        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 493        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 494        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 495        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 496        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 497        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 498        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 499        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 500        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 501        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 502        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 503        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 504        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 505        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 506        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 507        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 508        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 509        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 510        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 511        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 512        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 513        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 514        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 515        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 516        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 517        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 518        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 519        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 520        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 521        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 522        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 523};
 524
 525static const u32 stoney_golden_settings_a11[] =
 526{
 527        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 528        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 529        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 530        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 531        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 532        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 533        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 534        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 535        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 536        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 537};
 538
 539static const u32 stoney_golden_common_all[] =
 540{
 541        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 542        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 543        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 544        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 545        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 546        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 547        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 548        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 549};
 550
 551static const u32 stoney_mgcg_cgcg_init[] =
 552{
 553        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 554        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 555        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 556        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 557        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 558        mmATC_MISC_CG, 0xffffffff, 0x000c0200,
 559};
 560
 561static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 562static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 563static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 564
 565static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 566{
 567        switch (adev->asic_type) {
 568        case CHIP_TOPAZ:
 569                amdgpu_program_register_sequence(adev,
 570                                                 iceland_mgcg_cgcg_init,
 571                                                 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 572                amdgpu_program_register_sequence(adev,
 573                                                 golden_settings_iceland_a11,
 574                                                 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 575                amdgpu_program_register_sequence(adev,
 576                                                 iceland_golden_common_all,
 577                                                 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 578                break;
 579        case CHIP_FIJI:
 580                amdgpu_program_register_sequence(adev,
 581                                                 fiji_mgcg_cgcg_init,
 582                                                 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 583                amdgpu_program_register_sequence(adev,
 584                                                 golden_settings_fiji_a10,
 585                                                 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 586                amdgpu_program_register_sequence(adev,
 587                                                 fiji_golden_common_all,
 588                                                 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 589                break;
 590
 591        case CHIP_TONGA:
 592                amdgpu_program_register_sequence(adev,
 593                                                 tonga_mgcg_cgcg_init,
 594                                                 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 595                amdgpu_program_register_sequence(adev,
 596                                                 golden_settings_tonga_a11,
 597                                                 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 598                amdgpu_program_register_sequence(adev,
 599                                                 tonga_golden_common_all,
 600                                                 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 601                break;
 602        case CHIP_CARRIZO:
 603                amdgpu_program_register_sequence(adev,
 604                                                 cz_mgcg_cgcg_init,
 605                                                 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 606                amdgpu_program_register_sequence(adev,
 607                                                 cz_golden_settings_a11,
 608                                                 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 609                amdgpu_program_register_sequence(adev,
 610                                                 cz_golden_common_all,
 611                                                 (const u32)ARRAY_SIZE(cz_golden_common_all));
 612                break;
 613        case CHIP_STONEY:
 614                amdgpu_program_register_sequence(adev,
 615                                                 stoney_mgcg_cgcg_init,
 616                                                 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 617                amdgpu_program_register_sequence(adev,
 618                                                 stoney_golden_settings_a11,
 619                                                 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 620                amdgpu_program_register_sequence(adev,
 621                                                 stoney_golden_common_all,
 622                                                 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 623                break;
 624        default:
 625                break;
 626        }
 627}
 628
 629static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 630{
 631        int i;
 632
 633        adev->gfx.scratch.num_reg = 7;
 634        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 635        for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 636                adev->gfx.scratch.free[i] = true;
 637                adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 638        }
 639}
 640
 641static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 642{
 643        struct amdgpu_device *adev = ring->adev;
 644        uint32_t scratch;
 645        uint32_t tmp = 0;
 646        unsigned i;
 647        int r;
 648
 649        r = amdgpu_gfx_scratch_get(adev, &scratch);
 650        if (r) {
 651                DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 652                return r;
 653        }
 654        WREG32(scratch, 0xCAFEDEAD);
 655        r = amdgpu_ring_lock(ring, 3);
 656        if (r) {
 657                DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 658                          ring->idx, r);
 659                amdgpu_gfx_scratch_free(adev, scratch);
 660                return r;
 661        }
 662        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 663        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 664        amdgpu_ring_write(ring, 0xDEADBEEF);
 665        amdgpu_ring_unlock_commit(ring);
 666
 667        for (i = 0; i < adev->usec_timeout; i++) {
 668                tmp = RREG32(scratch);
 669                if (tmp == 0xDEADBEEF)
 670                        break;
 671                DRM_UDELAY(1);
 672        }
 673        if (i < adev->usec_timeout) {
 674                DRM_INFO("ring test on %d succeeded in %d usecs\n",
 675                         ring->idx, i);
 676        } else {
 677                DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 678                          ring->idx, scratch, tmp);
 679                r = -EINVAL;
 680        }
 681        amdgpu_gfx_scratch_free(adev, scratch);
 682        return r;
 683}
 684
 685static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 686{
 687        struct amdgpu_device *adev = ring->adev;
 688        struct amdgpu_ib ib;
 689        struct fence *f = NULL;
 690        uint32_t scratch;
 691        uint32_t tmp = 0;
 692        unsigned i;
 693        int r;
 694
 695        r = amdgpu_gfx_scratch_get(adev, &scratch);
 696        if (r) {
 697                DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
 698                return r;
 699        }
 700        WREG32(scratch, 0xCAFEDEAD);
 701        memset(&ib, 0, sizeof(ib));
 702        r = amdgpu_ib_get(ring, NULL, 256, &ib);
 703        if (r) {
 704                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 705                goto err1;
 706        }
 707        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 708        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 709        ib.ptr[2] = 0xDEADBEEF;
 710        ib.length_dw = 3;
 711
 712        r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
 713                                                 AMDGPU_FENCE_OWNER_UNDEFINED,
 714                                                 &f);
 715        if (r)
 716                goto err2;
 717
 718        r = fence_wait(f, false);
 719        if (r) {
 720                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 721                goto err2;
 722        }
 723        for (i = 0; i < adev->usec_timeout; i++) {
 724                tmp = RREG32(scratch);
 725                if (tmp == 0xDEADBEEF)
 726                        break;
 727                DRM_UDELAY(1);
 728        }
 729        if (i < adev->usec_timeout) {
 730                DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
 731                         ring->idx, i);
 732                goto err2;
 733        } else {
 734                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 735                          scratch, tmp);
 736                r = -EINVAL;
 737        }
 738err2:
 739        fence_put(f);
 740        amdgpu_ib_free(adev, &ib);
 741err1:
 742        amdgpu_gfx_scratch_free(adev, scratch);
 743        return r;
 744}
 745
 746static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 747{
 748        const char *chip_name;
 749        char fw_name[30];
 750        int err;
 751        struct amdgpu_firmware_info *info = NULL;
 752        const struct common_firmware_header *header = NULL;
 753        const struct gfx_firmware_header_v1_0 *cp_hdr;
 754
 755        DRM_DEBUG("\n");
 756
 757        switch (adev->asic_type) {
 758        case CHIP_TOPAZ:
 759                chip_name = "topaz";
 760                break;
 761        case CHIP_TONGA:
 762                chip_name = "tonga";
 763                break;
 764        case CHIP_CARRIZO:
 765                chip_name = "carrizo";
 766                break;
 767        case CHIP_FIJI:
 768                chip_name = "fiji";
 769                break;
 770        case CHIP_STONEY:
 771                chip_name = "stoney";
 772                break;
 773        default:
 774                BUG();
 775        }
 776
 777        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 778        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 779        if (err)
 780                goto out;
 781        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 782        if (err)
 783                goto out;
 784        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 785        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 786        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 787
 788        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 789        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 790        if (err)
 791                goto out;
 792        err = amdgpu_ucode_validate(adev->gfx.me_fw);
 793        if (err)
 794                goto out;
 795        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 796        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 797        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 798
 799        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 800        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 801        if (err)
 802                goto out;
 803        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 804        if (err)
 805                goto out;
 806        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 807        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 808        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 809
 810        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 811        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 812        if (err)
 813                goto out;
 814        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 815        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
 816        adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 817        adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 818
 819        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 820        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 821        if (err)
 822                goto out;
 823        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 824        if (err)
 825                goto out;
 826        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 827        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 828        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 829
 830        if ((adev->asic_type != CHIP_STONEY) &&
 831            (adev->asic_type != CHIP_TOPAZ)) {
 832                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 833                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 834                if (!err) {
 835                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 836                        if (err)
 837                                goto out;
 838                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
 839                                adev->gfx.mec2_fw->data;
 840                        adev->gfx.mec2_fw_version =
 841                                le32_to_cpu(cp_hdr->header.ucode_version);
 842                        adev->gfx.mec2_feature_version =
 843                                le32_to_cpu(cp_hdr->ucode_feature_version);
 844                } else {
 845                        err = 0;
 846                        adev->gfx.mec2_fw = NULL;
 847                }
 848        }
 849
 850        if (adev->firmware.smu_load) {
 851                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
 852                info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
 853                info->fw = adev->gfx.pfp_fw;
 854                header = (const struct common_firmware_header *)info->fw->data;
 855                adev->firmware.fw_size +=
 856                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 857
 858                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
 859                info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
 860                info->fw = adev->gfx.me_fw;
 861                header = (const struct common_firmware_header *)info->fw->data;
 862                adev->firmware.fw_size +=
 863                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 864
 865                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
 866                info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
 867                info->fw = adev->gfx.ce_fw;
 868                header = (const struct common_firmware_header *)info->fw->data;
 869                adev->firmware.fw_size +=
 870                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 871
 872                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 873                info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 874                info->fw = adev->gfx.rlc_fw;
 875                header = (const struct common_firmware_header *)info->fw->data;
 876                adev->firmware.fw_size +=
 877                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 878
 879                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 880                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 881                info->fw = adev->gfx.mec_fw;
 882                header = (const struct common_firmware_header *)info->fw->data;
 883                adev->firmware.fw_size +=
 884                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 885
 886                if (adev->gfx.mec2_fw) {
 887                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 888                        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
 889                        info->fw = adev->gfx.mec2_fw;
 890                        header = (const struct common_firmware_header *)info->fw->data;
 891                        adev->firmware.fw_size +=
 892                                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 893                }
 894
 895        }
 896
 897out:
 898        if (err) {
 899                dev_err(adev->dev,
 900                        "gfx8: Failed to load firmware \"%s\"\n",
 901                        fw_name);
 902                release_firmware(adev->gfx.pfp_fw);
 903                adev->gfx.pfp_fw = NULL;
 904                release_firmware(adev->gfx.me_fw);
 905                adev->gfx.me_fw = NULL;
 906                release_firmware(adev->gfx.ce_fw);
 907                adev->gfx.ce_fw = NULL;
 908                release_firmware(adev->gfx.rlc_fw);
 909                adev->gfx.rlc_fw = NULL;
 910                release_firmware(adev->gfx.mec_fw);
 911                adev->gfx.mec_fw = NULL;
 912                release_firmware(adev->gfx.mec2_fw);
 913                adev->gfx.mec2_fw = NULL;
 914        }
 915        return err;
 916}
 917
 918static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 919{
 920        int r;
 921
 922        if (adev->gfx.mec.hpd_eop_obj) {
 923                r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 924                if (unlikely(r != 0))
 925                        dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 926                amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 927                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 928
 929                amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 930                adev->gfx.mec.hpd_eop_obj = NULL;
 931        }
 932}
 933
 934#define MEC_HPD_SIZE 2048
 935
 936static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 937{
 938        int r;
 939        u32 *hpd;
 940
 941        /*
 942         * we assign only 1 pipe because all other pipes will
 943         * be handled by KFD
 944         */
 945        adev->gfx.mec.num_mec = 1;
 946        adev->gfx.mec.num_pipe = 1;
 947        adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 948
 949        if (adev->gfx.mec.hpd_eop_obj == NULL) {
 950                r = amdgpu_bo_create(adev,
 951                                     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
 952                                     PAGE_SIZE, true,
 953                                     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 954                                     &adev->gfx.mec.hpd_eop_obj);
 955                if (r) {
 956                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 957                        return r;
 958                }
 959        }
 960
 961        r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 962        if (unlikely(r != 0)) {
 963                gfx_v8_0_mec_fini(adev);
 964                return r;
 965        }
 966        r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 967                          &adev->gfx.mec.hpd_eop_gpu_addr);
 968        if (r) {
 969                dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 970                gfx_v8_0_mec_fini(adev);
 971                return r;
 972        }
 973        r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 974        if (r) {
 975                dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 976                gfx_v8_0_mec_fini(adev);
 977                return r;
 978        }
 979
 980        memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
 981
 982        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 983        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 984
 985        return 0;
 986}
 987
 988static const u32 vgpr_init_compute_shader[] =
 989{
 990        0x7e000209, 0x7e020208,
 991        0x7e040207, 0x7e060206,
 992        0x7e080205, 0x7e0a0204,
 993        0x7e0c0203, 0x7e0e0202,
 994        0x7e100201, 0x7e120200,
 995        0x7e140209, 0x7e160208,
 996        0x7e180207, 0x7e1a0206,
 997        0x7e1c0205, 0x7e1e0204,
 998        0x7e200203, 0x7e220202,
 999        0x7e240201, 0x7e260200,
1000        0x7e280209, 0x7e2a0208,
1001        0x7e2c0207, 0x7e2e0206,
1002        0x7e300205, 0x7e320204,
1003        0x7e340203, 0x7e360202,
1004        0x7e380201, 0x7e3a0200,
1005        0x7e3c0209, 0x7e3e0208,
1006        0x7e400207, 0x7e420206,
1007        0x7e440205, 0x7e460204,
1008        0x7e480203, 0x7e4a0202,
1009        0x7e4c0201, 0x7e4e0200,
1010        0x7e500209, 0x7e520208,
1011        0x7e540207, 0x7e560206,
1012        0x7e580205, 0x7e5a0204,
1013        0x7e5c0203, 0x7e5e0202,
1014        0x7e600201, 0x7e620200,
1015        0x7e640209, 0x7e660208,
1016        0x7e680207, 0x7e6a0206,
1017        0x7e6c0205, 0x7e6e0204,
1018        0x7e700203, 0x7e720202,
1019        0x7e740201, 0x7e760200,
1020        0x7e780209, 0x7e7a0208,
1021        0x7e7c0207, 0x7e7e0206,
1022        0xbf8a0000, 0xbf810000,
1023};
1024
1025static const u32 sgpr_init_compute_shader[] =
1026{
1027        0xbe8a0100, 0xbe8c0102,
1028        0xbe8e0104, 0xbe900106,
1029        0xbe920108, 0xbe940100,
1030        0xbe960102, 0xbe980104,
1031        0xbe9a0106, 0xbe9c0108,
1032        0xbe9e0100, 0xbea00102,
1033        0xbea20104, 0xbea40106,
1034        0xbea60108, 0xbea80100,
1035        0xbeaa0102, 0xbeac0104,
1036        0xbeae0106, 0xbeb00108,
1037        0xbeb20100, 0xbeb40102,
1038        0xbeb60104, 0xbeb80106,
1039        0xbeba0108, 0xbebc0100,
1040        0xbebe0102, 0xbec00104,
1041        0xbec20106, 0xbec40108,
1042        0xbec60100, 0xbec80102,
1043        0xbee60004, 0xbee70005,
1044        0xbeea0006, 0xbeeb0007,
1045        0xbee80008, 0xbee90009,
1046        0xbefc0000, 0xbf8a0000,
1047        0xbf810000, 0x00000000,
1048};
1049
1050static const u32 vgpr_init_regs[] =
1051{
1052        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053        mmCOMPUTE_RESOURCE_LIMITS, 0,
1054        mmCOMPUTE_NUM_THREAD_X, 256*4,
1055        mmCOMPUTE_NUM_THREAD_Y, 1,
1056        mmCOMPUTE_NUM_THREAD_Z, 1,
1057        mmCOMPUTE_PGM_RSRC2, 20,
1058        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1068};
1069
1070static const u32 sgpr1_init_regs[] =
1071{
1072        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074        mmCOMPUTE_NUM_THREAD_X, 256*5,
1075        mmCOMPUTE_NUM_THREAD_Y, 1,
1076        mmCOMPUTE_NUM_THREAD_Z, 1,
1077        mmCOMPUTE_PGM_RSRC2, 20,
1078        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1088};
1089
1090static const u32 sgpr2_init_regs[] =
1091{
1092        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094        mmCOMPUTE_NUM_THREAD_X, 256*5,
1095        mmCOMPUTE_NUM_THREAD_Y, 1,
1096        mmCOMPUTE_NUM_THREAD_Z, 1,
1097        mmCOMPUTE_PGM_RSRC2, 20,
1098        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1108};
1109
1110static const u32 sec_ded_counter_registers[] =
1111{
1112        mmCPC_EDC_ATC_CNT,
1113        mmCPC_EDC_SCRATCH_CNT,
1114        mmCPC_EDC_UCODE_CNT,
1115        mmCPF_EDC_ATC_CNT,
1116        mmCPF_EDC_ROQ_CNT,
1117        mmCPF_EDC_TAG_CNT,
1118        mmCPG_EDC_ATC_CNT,
1119        mmCPG_EDC_DMA_CNT,
1120        mmCPG_EDC_TAG_CNT,
1121        mmDC_EDC_CSINVOC_CNT,
1122        mmDC_EDC_RESTORE_CNT,
1123        mmDC_EDC_STATE_CNT,
1124        mmGDS_EDC_CNT,
1125        mmGDS_EDC_GRBM_CNT,
1126        mmGDS_EDC_OA_DED,
1127        mmSPI_EDC_CNT,
1128        mmSQC_ATC_EDC_GATCL1_CNT,
1129        mmSQC_EDC_CNT,
1130        mmSQ_EDC_DED_CNT,
1131        mmSQ_EDC_INFO,
1132        mmSQ_EDC_SEC_CNT,
1133        mmTCC_EDC_CNT,
1134        mmTCP_ATC_EDC_GATCL1_CNT,
1135        mmTCP_EDC_CNT,
1136        mmTD_EDC_CNT
1137};
1138
1139static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1140{
1141        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142        struct amdgpu_ib ib;
1143        struct fence *f = NULL;
1144        int r, i;
1145        u32 tmp;
1146        unsigned total_size, vgpr_offset, sgpr_offset;
1147        u64 gpu_addr;
1148
1149        /* only supported on CZ */
1150        if (adev->asic_type != CHIP_CARRIZO)
1151                return 0;
1152
1153        /* bail if the compute ring is not ready */
1154        if (!ring->ready)
1155                return 0;
1156
1157        tmp = RREG32(mmGB_EDC_MODE);
1158        WREG32(mmGB_EDC_MODE, 0);
1159
1160        total_size =
1161                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162        total_size +=
1163                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1164        total_size +=
1165                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166        total_size = ALIGN(total_size, 256);
1167        vgpr_offset = total_size;
1168        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169        sgpr_offset = total_size;
1170        total_size += sizeof(sgpr_init_compute_shader);
1171
1172        /* allocate an indirect buffer to put the commands in */
1173        memset(&ib, 0, sizeof(ib));
1174        r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1175        if (r) {
1176                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177                return r;
1178        }
1179
1180        /* load the compute shaders */
1181        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1183
1184        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1186
1187        /* init the ib length to 0 */
1188        ib.length_dw = 0;
1189
1190        /* VGPR */
1191        /* write the register state for the compute dispatch */
1192        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1196        }
1197        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1203
1204        /* write dispatch packet */
1205        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206        ib.ptr[ib.length_dw++] = 8; /* x */
1207        ib.ptr[ib.length_dw++] = 1; /* y */
1208        ib.ptr[ib.length_dw++] = 1; /* z */
1209        ib.ptr[ib.length_dw++] =
1210                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1211
1212        /* write CS partial flush packet */
1213        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1215
1216        /* SGPR1 */
1217        /* write the register state for the compute dispatch */
1218        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1222        }
1223        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1229
1230        /* write dispatch packet */
1231        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232        ib.ptr[ib.length_dw++] = 8; /* x */
1233        ib.ptr[ib.length_dw++] = 1; /* y */
1234        ib.ptr[ib.length_dw++] = 1; /* z */
1235        ib.ptr[ib.length_dw++] =
1236                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1237
1238        /* write CS partial flush packet */
1239        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1241
1242        /* SGPR2 */
1243        /* write the register state for the compute dispatch */
1244        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1248        }
1249        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1255
1256        /* write dispatch packet */
1257        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258        ib.ptr[ib.length_dw++] = 8; /* x */
1259        ib.ptr[ib.length_dw++] = 1; /* y */
1260        ib.ptr[ib.length_dw++] = 1; /* z */
1261        ib.ptr[ib.length_dw++] =
1262                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1263
1264        /* write CS partial flush packet */
1265        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267
1268        /* shedule the ib on the ring */
1269        r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270                                                 AMDGPU_FENCE_OWNER_UNDEFINED,
1271                                                 &f);
1272        if (r) {
1273                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274                goto fail;
1275        }
1276
1277        /* wait for the GPU to finish processing the IB */
1278        r = fence_wait(f, false);
1279        if (r) {
1280                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1281                goto fail;
1282        }
1283
1284        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286        WREG32(mmGB_EDC_MODE, tmp);
1287
1288        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1291
1292
1293        /* read back registers to clear the counters */
1294        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295                RREG32(sec_ded_counter_registers[i]);
1296
1297fail:
1298        fence_put(f);
1299        amdgpu_ib_free(adev, &ib);
1300
1301        return r;
1302}
1303
1304static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1305{
1306        u32 gb_addr_config;
1307        u32 mc_shared_chmap, mc_arb_ramcfg;
1308        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1309        u32 tmp;
1310
1311        switch (adev->asic_type) {
1312        case CHIP_TOPAZ:
1313                adev->gfx.config.max_shader_engines = 1;
1314                adev->gfx.config.max_tile_pipes = 2;
1315                adev->gfx.config.max_cu_per_sh = 6;
1316                adev->gfx.config.max_sh_per_se = 1;
1317                adev->gfx.config.max_backends_per_se = 2;
1318                adev->gfx.config.max_texture_channel_caches = 2;
1319                adev->gfx.config.max_gprs = 256;
1320                adev->gfx.config.max_gs_threads = 32;
1321                adev->gfx.config.max_hw_contexts = 8;
1322
1323                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1328                break;
1329        case CHIP_FIJI:
1330                adev->gfx.config.max_shader_engines = 4;
1331                adev->gfx.config.max_tile_pipes = 16;
1332                adev->gfx.config.max_cu_per_sh = 16;
1333                adev->gfx.config.max_sh_per_se = 1;
1334                adev->gfx.config.max_backends_per_se = 4;
1335                adev->gfx.config.max_texture_channel_caches = 16;
1336                adev->gfx.config.max_gprs = 256;
1337                adev->gfx.config.max_gs_threads = 32;
1338                adev->gfx.config.max_hw_contexts = 8;
1339
1340                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1345                break;
1346        case CHIP_TONGA:
1347                adev->gfx.config.max_shader_engines = 4;
1348                adev->gfx.config.max_tile_pipes = 8;
1349                adev->gfx.config.max_cu_per_sh = 8;
1350                adev->gfx.config.max_sh_per_se = 1;
1351                adev->gfx.config.max_backends_per_se = 2;
1352                adev->gfx.config.max_texture_channel_caches = 8;
1353                adev->gfx.config.max_gprs = 256;
1354                adev->gfx.config.max_gs_threads = 32;
1355                adev->gfx.config.max_hw_contexts = 8;
1356
1357                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1362                break;
1363        case CHIP_CARRIZO:
1364                adev->gfx.config.max_shader_engines = 1;
1365                adev->gfx.config.max_tile_pipes = 2;
1366                adev->gfx.config.max_sh_per_se = 1;
1367                adev->gfx.config.max_backends_per_se = 2;
1368
1369                switch (adev->pdev->revision) {
1370                case 0xc4:
1371                case 0x84:
1372                case 0xc8:
1373                case 0xcc:
1374                case 0xe1:
1375                case 0xe3:
1376                        /* B10 */
1377                        adev->gfx.config.max_cu_per_sh = 8;
1378                        break;
1379                case 0xc5:
1380                case 0x81:
1381                case 0x85:
1382                case 0xc9:
1383                case 0xcd:
1384                case 0xe2:
1385                case 0xe4:
1386                        /* B8 */
1387                        adev->gfx.config.max_cu_per_sh = 6;
1388                        break;
1389                case 0xc6:
1390                case 0xca:
1391                case 0xce:
1392                case 0x88:
1393                        /* B6 */
1394                        adev->gfx.config.max_cu_per_sh = 6;
1395                        break;
1396                case 0xc7:
1397                case 0x87:
1398                case 0xcb:
1399                case 0xe5:
1400                case 0x89:
1401                default:
1402                        /* B4 */
1403                        adev->gfx.config.max_cu_per_sh = 4;
1404                        break;
1405                }
1406
1407                adev->gfx.config.max_texture_channel_caches = 2;
1408                adev->gfx.config.max_gprs = 256;
1409                adev->gfx.config.max_gs_threads = 32;
1410                adev->gfx.config.max_hw_contexts = 8;
1411
1412                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1417                break;
1418        case CHIP_STONEY:
1419                adev->gfx.config.max_shader_engines = 1;
1420                adev->gfx.config.max_tile_pipes = 2;
1421                adev->gfx.config.max_sh_per_se = 1;
1422                adev->gfx.config.max_backends_per_se = 1;
1423
1424                switch (adev->pdev->revision) {
1425                case 0xc0:
1426                case 0xc1:
1427                case 0xc2:
1428                case 0xc4:
1429                case 0xc8:
1430                case 0xc9:
1431                        adev->gfx.config.max_cu_per_sh = 3;
1432                        break;
1433                case 0xd0:
1434                case 0xd1:
1435                case 0xd2:
1436                default:
1437                        adev->gfx.config.max_cu_per_sh = 2;
1438                        break;
1439                }
1440
1441                adev->gfx.config.max_texture_channel_caches = 2;
1442                adev->gfx.config.max_gprs = 256;
1443                adev->gfx.config.max_gs_threads = 16;
1444                adev->gfx.config.max_hw_contexts = 8;
1445
1446                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1451                break;
1452        default:
1453                adev->gfx.config.max_shader_engines = 2;
1454                adev->gfx.config.max_tile_pipes = 4;
1455                adev->gfx.config.max_cu_per_sh = 2;
1456                adev->gfx.config.max_sh_per_se = 1;
1457                adev->gfx.config.max_backends_per_se = 2;
1458                adev->gfx.config.max_texture_channel_caches = 4;
1459                adev->gfx.config.max_gprs = 256;
1460                adev->gfx.config.max_gs_threads = 32;
1461                adev->gfx.config.max_hw_contexts = 8;
1462
1463                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1468                break;
1469        }
1470
1471        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1474
1475        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476        adev->gfx.config.mem_max_burst_length_bytes = 256;
1477        if (adev->flags & AMD_IS_APU) {
1478                /* Get memory bank mapping mode. */
1479                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482
1483                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1486
1487                /* Validate settings in case only one DIMM installed. */
1488                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489                        dimm00_addr_map = 0;
1490                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491                        dimm01_addr_map = 0;
1492                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493                        dimm10_addr_map = 0;
1494                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495                        dimm11_addr_map = 0;
1496
1497                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500                        adev->gfx.config.mem_row_size_in_kb = 2;
1501                else
1502                        adev->gfx.config.mem_row_size_in_kb = 1;
1503        } else {
1504                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506                if (adev->gfx.config.mem_row_size_in_kb > 4)
1507                        adev->gfx.config.mem_row_size_in_kb = 4;
1508        }
1509
1510        adev->gfx.config.shader_engine_tile_size = 32;
1511        adev->gfx.config.num_gpus = 1;
1512        adev->gfx.config.multi_gpu_tile_size = 64;
1513
1514        /* fix up row size */
1515        switch (adev->gfx.config.mem_row_size_in_kb) {
1516        case 1:
1517        default:
1518                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1519                break;
1520        case 2:
1521                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1522                break;
1523        case 4:
1524                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1525                break;
1526        }
1527        adev->gfx.config.gb_addr_config = gb_addr_config;
1528}
1529
1530static int gfx_v8_0_sw_init(void *handle)
1531{
1532        int i, r;
1533        struct amdgpu_ring *ring;
1534        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1535
1536        /* EOP Event */
1537        r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1538        if (r)
1539                return r;
1540
1541        /* Privileged reg */
1542        r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1543        if (r)
1544                return r;
1545
1546        /* Privileged inst */
1547        r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1548        if (r)
1549                return r;
1550
1551        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1552
1553        gfx_v8_0_scratch_init(adev);
1554
1555        r = gfx_v8_0_init_microcode(adev);
1556        if (r) {
1557                DRM_ERROR("Failed to load gfx firmware!\n");
1558                return r;
1559        }
1560
1561        r = gfx_v8_0_mec_init(adev);
1562        if (r) {
1563                DRM_ERROR("Failed to init MEC BOs!\n");
1564                return r;
1565        }
1566
1567        /* set up the gfx ring */
1568        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569                ring = &adev->gfx.gfx_ring[i];
1570                ring->ring_obj = NULL;
1571                sprintf(ring->name, "gfx");
1572                /* no gfx doorbells on iceland */
1573                if (adev->asic_type != CHIP_TOPAZ) {
1574                        ring->use_doorbell = true;
1575                        ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1576                }
1577
1578                r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580                                     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581                                     AMDGPU_RING_TYPE_GFX);
1582                if (r)
1583                        return r;
1584        }
1585
1586        /* set up the compute queues */
1587        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1588                unsigned irq_type;
1589
1590                /* max 32 queues per MEC */
1591                if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592                        DRM_ERROR("Too many (%d) compute rings!\n", i);
1593                        break;
1594                }
1595                ring = &adev->gfx.compute_ring[i];
1596                ring->ring_obj = NULL;
1597                ring->use_doorbell = true;
1598                ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599                ring->me = 1; /* first MEC */
1600                ring->pipe = i / 8;
1601                ring->queue = i % 8;
1602                sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603                irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604                /* type-2 packets are deprecated on MEC, use type-3 instead */
1605                r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607                                     &adev->gfx.eop_irq, irq_type,
1608                                     AMDGPU_RING_TYPE_COMPUTE);
1609                if (r)
1610                        return r;
1611        }
1612
1613        /* reserve GDS, GWS and OA resource for gfx */
1614        r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1615                        PAGE_SIZE, true,
1616                        AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1617                        NULL, &adev->gds.gds_gfx_bo);
1618        if (r)
1619                return r;
1620
1621        r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1622                PAGE_SIZE, true,
1623                AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1624                NULL, &adev->gds.gws_gfx_bo);
1625        if (r)
1626                return r;
1627
1628        r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1629                        PAGE_SIZE, true,
1630                        AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1631                        NULL, &adev->gds.oa_gfx_bo);
1632        if (r)
1633                return r;
1634
1635        adev->gfx.ce_ram_size = 0x8000;
1636
1637        gfx_v8_0_gpu_early_init(adev);
1638
1639        return 0;
1640}
1641
1642static int gfx_v8_0_sw_fini(void *handle)
1643{
1644        int i;
1645        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646
1647        amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648        amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649        amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1650
1651        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653        for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1655
1656        gfx_v8_0_mec_fini(adev);
1657
1658        return 0;
1659}
1660
1661static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1662{
1663        uint32_t *modearray, *mod2array;
1664        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1666        u32 reg_offset;
1667
1668        modearray = adev->gfx.config.tile_mode_array;
1669        mod2array = adev->gfx.config.macrotile_mode_array;
1670
1671        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672                modearray[reg_offset] = 0;
1673
1674        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1675                mod2array[reg_offset] = 0;
1676
1677        switch (adev->asic_type) {
1678        case CHIP_TOPAZ:
1679                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680                                PIPE_CONFIG(ADDR_SURF_P2) |
1681                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684                                PIPE_CONFIG(ADDR_SURF_P2) |
1685                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688                                PIPE_CONFIG(ADDR_SURF_P2) |
1689                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692                                PIPE_CONFIG(ADDR_SURF_P2) |
1693                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696                                PIPE_CONFIG(ADDR_SURF_P2) |
1697                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700                                PIPE_CONFIG(ADDR_SURF_P2) |
1701                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704                                PIPE_CONFIG(ADDR_SURF_P2) |
1705                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708                                PIPE_CONFIG(ADDR_SURF_P2));
1709                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710                                PIPE_CONFIG(ADDR_SURF_P2) |
1711                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714                                 PIPE_CONFIG(ADDR_SURF_P2) |
1715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718                                 PIPE_CONFIG(ADDR_SURF_P2) |
1719                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722                                 PIPE_CONFIG(ADDR_SURF_P2) |
1723                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726                                 PIPE_CONFIG(ADDR_SURF_P2) |
1727                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730                                 PIPE_CONFIG(ADDR_SURF_P2) |
1731                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734                                 PIPE_CONFIG(ADDR_SURF_P2) |
1735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738                                 PIPE_CONFIG(ADDR_SURF_P2) |
1739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742                                 PIPE_CONFIG(ADDR_SURF_P2) |
1743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746                                 PIPE_CONFIG(ADDR_SURF_P2) |
1747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750                                 PIPE_CONFIG(ADDR_SURF_P2) |
1751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754                                 PIPE_CONFIG(ADDR_SURF_P2) |
1755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758                                 PIPE_CONFIG(ADDR_SURF_P2) |
1759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762                                 PIPE_CONFIG(ADDR_SURF_P2) |
1763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766                                 PIPE_CONFIG(ADDR_SURF_P2) |
1767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770                                 PIPE_CONFIG(ADDR_SURF_P2) |
1771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774                                 PIPE_CONFIG(ADDR_SURF_P2) |
1775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778                                 PIPE_CONFIG(ADDR_SURF_P2) |
1779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781
1782                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785                                NUM_BANKS(ADDR_SURF_8_BANK));
1786                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789                                NUM_BANKS(ADDR_SURF_8_BANK));
1790                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793                                NUM_BANKS(ADDR_SURF_8_BANK));
1794                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797                                NUM_BANKS(ADDR_SURF_8_BANK));
1798                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801                                NUM_BANKS(ADDR_SURF_8_BANK));
1802                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805                                NUM_BANKS(ADDR_SURF_8_BANK));
1806                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809                                NUM_BANKS(ADDR_SURF_8_BANK));
1810                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813                                NUM_BANKS(ADDR_SURF_16_BANK));
1814                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817                                NUM_BANKS(ADDR_SURF_16_BANK));
1818                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821                                 NUM_BANKS(ADDR_SURF_16_BANK));
1822                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825                                 NUM_BANKS(ADDR_SURF_16_BANK));
1826                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829                                 NUM_BANKS(ADDR_SURF_16_BANK));
1830                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833                                 NUM_BANKS(ADDR_SURF_16_BANK));
1834                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837                                 NUM_BANKS(ADDR_SURF_8_BANK));
1838
1839                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1841                            reg_offset != 23)
1842                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1843
1844                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845                        if (reg_offset != 7)
1846                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1847
1848                break;
1849        case CHIP_FIJI:
1850                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1972
1973                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976                                NUM_BANKS(ADDR_SURF_8_BANK));
1977                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980                                NUM_BANKS(ADDR_SURF_8_BANK));
1981                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984                                NUM_BANKS(ADDR_SURF_8_BANK));
1985                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988                                NUM_BANKS(ADDR_SURF_8_BANK));
1989                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992                                NUM_BANKS(ADDR_SURF_8_BANK));
1993                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996                                NUM_BANKS(ADDR_SURF_8_BANK));
1997                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000                                NUM_BANKS(ADDR_SURF_8_BANK));
2001                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004                                NUM_BANKS(ADDR_SURF_8_BANK));
2005                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008                                NUM_BANKS(ADDR_SURF_8_BANK));
2009                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012                                 NUM_BANKS(ADDR_SURF_8_BANK));
2013                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016                                 NUM_BANKS(ADDR_SURF_8_BANK));
2017                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020                                 NUM_BANKS(ADDR_SURF_8_BANK));
2021                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024                                 NUM_BANKS(ADDR_SURF_8_BANK));
2025                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028                                 NUM_BANKS(ADDR_SURF_4_BANK));
2029
2030                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2032
2033                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034                        if (reg_offset != 7)
2035                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2036
2037                break;
2038        case CHIP_TONGA:
2039                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161
2162                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165                                NUM_BANKS(ADDR_SURF_16_BANK));
2166                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169                                NUM_BANKS(ADDR_SURF_16_BANK));
2170                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173                                NUM_BANKS(ADDR_SURF_16_BANK));
2174                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177                                NUM_BANKS(ADDR_SURF_16_BANK));
2178                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181                                NUM_BANKS(ADDR_SURF_16_BANK));
2182                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185                                NUM_BANKS(ADDR_SURF_16_BANK));
2186                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189                                NUM_BANKS(ADDR_SURF_16_BANK));
2190                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193                                NUM_BANKS(ADDR_SURF_16_BANK));
2194                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197                                NUM_BANKS(ADDR_SURF_16_BANK));
2198                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201                                 NUM_BANKS(ADDR_SURF_16_BANK));
2202                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205                                 NUM_BANKS(ADDR_SURF_16_BANK));
2206                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209                                 NUM_BANKS(ADDR_SURF_8_BANK));
2210                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213                                 NUM_BANKS(ADDR_SURF_4_BANK));
2214                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217                                 NUM_BANKS(ADDR_SURF_4_BANK));
2218
2219                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2221
2222                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223                        if (reg_offset != 7)
2224                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2225
2226                break;
2227        case CHIP_STONEY:
2228                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                PIPE_CONFIG(ADDR_SURF_P2) |
2230                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                PIPE_CONFIG(ADDR_SURF_P2) |
2234                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                PIPE_CONFIG(ADDR_SURF_P2) |
2238                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                PIPE_CONFIG(ADDR_SURF_P2) |
2242                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                PIPE_CONFIG(ADDR_SURF_P2) |
2246                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                PIPE_CONFIG(ADDR_SURF_P2) |
2250                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253                                PIPE_CONFIG(ADDR_SURF_P2) |
2254                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257                                PIPE_CONFIG(ADDR_SURF_P2));
2258                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259                                PIPE_CONFIG(ADDR_SURF_P2) |
2260                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263                                 PIPE_CONFIG(ADDR_SURF_P2) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                 PIPE_CONFIG(ADDR_SURF_P2) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271                                 PIPE_CONFIG(ADDR_SURF_P2) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275                                 PIPE_CONFIG(ADDR_SURF_P2) |
2276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279                                 PIPE_CONFIG(ADDR_SURF_P2) |
2280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283                                 PIPE_CONFIG(ADDR_SURF_P2) |
2284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287                                 PIPE_CONFIG(ADDR_SURF_P2) |
2288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291                                 PIPE_CONFIG(ADDR_SURF_P2) |
2292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295                                 PIPE_CONFIG(ADDR_SURF_P2) |
2296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299                                 PIPE_CONFIG(ADDR_SURF_P2) |
2300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303                                 PIPE_CONFIG(ADDR_SURF_P2) |
2304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307                                 PIPE_CONFIG(ADDR_SURF_P2) |
2308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311                                 PIPE_CONFIG(ADDR_SURF_P2) |
2312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315                                 PIPE_CONFIG(ADDR_SURF_P2) |
2316                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P2) |
2320                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P2) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P2) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330
2331                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334                                NUM_BANKS(ADDR_SURF_8_BANK));
2335                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338                                NUM_BANKS(ADDR_SURF_8_BANK));
2339                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                NUM_BANKS(ADDR_SURF_8_BANK));
2343                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                NUM_BANKS(ADDR_SURF_8_BANK));
2347                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                NUM_BANKS(ADDR_SURF_8_BANK));
2351                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                NUM_BANKS(ADDR_SURF_8_BANK));
2355                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                NUM_BANKS(ADDR_SURF_8_BANK));
2359                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362                                NUM_BANKS(ADDR_SURF_16_BANK));
2363                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366                                NUM_BANKS(ADDR_SURF_16_BANK));
2367                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370                                 NUM_BANKS(ADDR_SURF_16_BANK));
2371                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374                                 NUM_BANKS(ADDR_SURF_16_BANK));
2375                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378                                 NUM_BANKS(ADDR_SURF_16_BANK));
2379                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382                                 NUM_BANKS(ADDR_SURF_16_BANK));
2383                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                                 NUM_BANKS(ADDR_SURF_8_BANK));
2387
2388                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390                            reg_offset != 23)
2391                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392
2393                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394                        if (reg_offset != 7)
2395                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2396
2397                break;
2398        default:
2399                dev_warn(adev->dev,
2400                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2401                         adev->asic_type);
2402
2403        case CHIP_CARRIZO:
2404                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                PIPE_CONFIG(ADDR_SURF_P2) |
2406                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                PIPE_CONFIG(ADDR_SURF_P2) |
2410                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                PIPE_CONFIG(ADDR_SURF_P2) |
2414                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                PIPE_CONFIG(ADDR_SURF_P2) |
2418                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                                PIPE_CONFIG(ADDR_SURF_P2) |
2422                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                PIPE_CONFIG(ADDR_SURF_P2) |
2426                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                PIPE_CONFIG(ADDR_SURF_P2) |
2430                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433                                PIPE_CONFIG(ADDR_SURF_P2));
2434                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                PIPE_CONFIG(ADDR_SURF_P2) |
2436                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P2) |
2440                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P2) |
2444                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P2) |
2448                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P2) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P2) |
2456                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P2) |
2460                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463                                 PIPE_CONFIG(ADDR_SURF_P2) |
2464                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467                                 PIPE_CONFIG(ADDR_SURF_P2) |
2468                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471                                 PIPE_CONFIG(ADDR_SURF_P2) |
2472                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475                                 PIPE_CONFIG(ADDR_SURF_P2) |
2476                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479                                 PIPE_CONFIG(ADDR_SURF_P2) |
2480                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483                                 PIPE_CONFIG(ADDR_SURF_P2) |
2484                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487                                 PIPE_CONFIG(ADDR_SURF_P2) |
2488                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491                                 PIPE_CONFIG(ADDR_SURF_P2) |
2492                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495                                 PIPE_CONFIG(ADDR_SURF_P2) |
2496                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P2) |
2500                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503                                 PIPE_CONFIG(ADDR_SURF_P2) |
2504                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2506
2507                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                NUM_BANKS(ADDR_SURF_8_BANK));
2511                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                NUM_BANKS(ADDR_SURF_8_BANK));
2515                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                NUM_BANKS(ADDR_SURF_8_BANK));
2519                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                NUM_BANKS(ADDR_SURF_8_BANK));
2523                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526                                NUM_BANKS(ADDR_SURF_8_BANK));
2527                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530                                NUM_BANKS(ADDR_SURF_8_BANK));
2531                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534                                NUM_BANKS(ADDR_SURF_8_BANK));
2535                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                NUM_BANKS(ADDR_SURF_16_BANK));
2539                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542                                NUM_BANKS(ADDR_SURF_16_BANK));
2543                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546                                 NUM_BANKS(ADDR_SURF_16_BANK));
2547                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550                                 NUM_BANKS(ADDR_SURF_16_BANK));
2551                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554                                 NUM_BANKS(ADDR_SURF_16_BANK));
2555                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558                                 NUM_BANKS(ADDR_SURF_16_BANK));
2559                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562                                 NUM_BANKS(ADDR_SURF_8_BANK));
2563
2564                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2566                            reg_offset != 23)
2567                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2568
2569                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570                        if (reg_offset != 7)
2571                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2572
2573                break;
2574        }
2575}
2576
2577static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578{
2579        return (u32)((1ULL << bit_width) - 1);
2580}
2581
2582void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583{
2584        u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2585
2586        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589        } else if (se_num == 0xffffffff) {
2590                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592        } else if (sh_num == 0xffffffff) {
2593                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2595        } else {
2596                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2598        }
2599        WREG32(mmGRBM_GFX_INDEX, data);
2600}
2601
2602static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603                                    u32 max_rb_num_per_se,
2604                                    u32 sh_per_se)
2605{
2606        u32 data, mask;
2607
2608        data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609        data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2610
2611        data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612
2613        data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614
2615        mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2616
2617        return data & mask;
2618}
2619
2620static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621                              u32 se_num, u32 sh_per_se,
2622                              u32 max_rb_num_per_se)
2623{
2624        int i, j;
2625        u32 data, mask;
2626        u32 disabled_rbs = 0;
2627        u32 enabled_rbs = 0;
2628
2629        mutex_lock(&adev->grbm_idx_mutex);
2630        for (i = 0; i < se_num; i++) {
2631                for (j = 0; j < sh_per_se; j++) {
2632                        gfx_v8_0_select_se_sh(adev, i, j);
2633                        data = gfx_v8_0_get_rb_disabled(adev,
2634                                              max_rb_num_per_se, sh_per_se);
2635                        disabled_rbs |= data << ((i * sh_per_se + j) *
2636                                                 RB_BITMAP_WIDTH_PER_SH);
2637                }
2638        }
2639        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640        mutex_unlock(&adev->grbm_idx_mutex);
2641
2642        mask = 1;
2643        for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644                if (!(disabled_rbs & mask))
2645                        enabled_rbs |= mask;
2646                mask <<= 1;
2647        }
2648
2649        adev->gfx.config.backend_enable_mask = enabled_rbs;
2650
2651        mutex_lock(&adev->grbm_idx_mutex);
2652        for (i = 0; i < se_num; i++) {
2653                gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654                data = RREG32(mmPA_SC_RASTER_CONFIG);
2655                for (j = 0; j < sh_per_se; j++) {
2656                        switch (enabled_rbs & 3) {
2657                        case 0:
2658                                if (j == 0)
2659                                        data |= (RASTER_CONFIG_RB_MAP_3 <<
2660                                                 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661                                else
2662                                        data |= (RASTER_CONFIG_RB_MAP_0 <<
2663                                                 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664                                break;
2665                        case 1:
2666                                data |= (RASTER_CONFIG_RB_MAP_0 <<
2667                                         (i * sh_per_se + j) * 2);
2668                                break;
2669                        case 2:
2670                                data |= (RASTER_CONFIG_RB_MAP_3 <<
2671                                         (i * sh_per_se + j) * 2);
2672                                break;
2673                        case 3:
2674                        default:
2675                                data |= (RASTER_CONFIG_RB_MAP_2 <<
2676                                         (i * sh_per_se + j) * 2);
2677                                break;
2678                        }
2679                        enabled_rbs >>= 2;
2680                }
2681                WREG32(mmPA_SC_RASTER_CONFIG, data);
2682        }
2683        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684        mutex_unlock(&adev->grbm_idx_mutex);
2685}
2686
2687/**
2688 * gfx_v8_0_init_compute_vmid - gart enable
2689 *
2690 * @rdev: amdgpu_device pointer
2691 *
2692 * Initialize compute vmid sh_mem registers
2693 *
2694 */
2695#define DEFAULT_SH_MEM_BASES    (0x6000)
2696#define FIRST_COMPUTE_VMID      (8)
2697#define LAST_COMPUTE_VMID       (16)
2698static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2699{
2700        int i;
2701        uint32_t sh_mem_config;
2702        uint32_t sh_mem_bases;
2703
2704        /*
2705         * Configure apertures:
2706         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2709         */
2710        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2711
2712        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2718
2719        mutex_lock(&adev->srbm_mutex);
2720        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721                vi_srbm_select(adev, 0, 0, 0, i);
2722                /* CP and shaders */
2723                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724                WREG32(mmSH_MEM_APE1_BASE, 1);
2725                WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726                WREG32(mmSH_MEM_BASES, sh_mem_bases);
2727        }
2728        vi_srbm_select(adev, 0, 0, 0, 0);
2729        mutex_unlock(&adev->srbm_mutex);
2730}
2731
2732static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2733{
2734        u32 tmp;
2735        int i;
2736
2737        tmp = RREG32(mmGRBM_CNTL);
2738        tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739        WREG32(mmGRBM_CNTL, tmp);
2740
2741        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744        WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745               adev->gfx.config.gb_addr_config & 0x70);
2746        WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747               adev->gfx.config.gb_addr_config & 0x70);
2748        WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749        WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750        WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2751
2752        gfx_v8_0_tiling_mode_table_init(adev);
2753
2754        gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755                                 adev->gfx.config.max_sh_per_se,
2756                                 adev->gfx.config.max_backends_per_se);
2757
2758        /* XXX SH_MEM regs */
2759        /* where to put LDS, scratch, GPUVM in FSA64 space */
2760        mutex_lock(&adev->srbm_mutex);
2761        for (i = 0; i < 16; i++) {
2762                vi_srbm_select(adev, 0, 0, 0, i);
2763                /* CP and shaders */
2764                if (i == 0) {
2765                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2767                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2768                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2769                        WREG32(mmSH_MEM_CONFIG, tmp);
2770                } else {
2771                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2773                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2774                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2775                        WREG32(mmSH_MEM_CONFIG, tmp);
2776                }
2777
2778                WREG32(mmSH_MEM_APE1_BASE, 1);
2779                WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780                WREG32(mmSH_MEM_BASES, 0);
2781        }
2782        vi_srbm_select(adev, 0, 0, 0, 0);
2783        mutex_unlock(&adev->srbm_mutex);
2784
2785        gfx_v8_0_init_compute_vmid(adev);
2786
2787        mutex_lock(&adev->grbm_idx_mutex);
2788        /*
2789         * making sure that the following register writes will be broadcasted
2790         * to all the shaders
2791         */
2792        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2793
2794        WREG32(mmPA_SC_FIFO_SIZE,
2795                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797                   (adev->gfx.config.sc_prim_fifo_size_backend <<
2798                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803        mutex_unlock(&adev->grbm_idx_mutex);
2804
2805}
2806
2807static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2808{
2809        u32 i, j, k;
2810        u32 mask;
2811
2812        mutex_lock(&adev->grbm_idx_mutex);
2813        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815                        gfx_v8_0_select_se_sh(adev, i, j);
2816                        for (k = 0; k < adev->usec_timeout; k++) {
2817                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2818                                        break;
2819                                udelay(1);
2820                        }
2821                }
2822        }
2823        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824        mutex_unlock(&adev->grbm_idx_mutex);
2825
2826        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830        for (k = 0; k < adev->usec_timeout; k++) {
2831                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2832                        break;
2833                udelay(1);
2834        }
2835}
2836
2837static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2838                                               bool enable)
2839{
2840        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2841
2842        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2846
2847        WREG32(mmCP_INT_CNTL_RING0, tmp);
2848}
2849
2850void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2851{
2852        u32 tmp = RREG32(mmRLC_CNTL);
2853
2854        tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855        WREG32(mmRLC_CNTL, tmp);
2856
2857        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2858
2859        gfx_v8_0_wait_for_rlc_serdes(adev);
2860}
2861
2862static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2863{
2864        u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2865
2866        tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867        WREG32(mmGRBM_SOFT_RESET, tmp);
2868        udelay(50);
2869        tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870        WREG32(mmGRBM_SOFT_RESET, tmp);
2871        udelay(50);
2872}
2873
2874static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2875{
2876        u32 tmp = RREG32(mmRLC_CNTL);
2877
2878        tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879        WREG32(mmRLC_CNTL, tmp);
2880
2881        /* carrizo do enable cp interrupt after cp inited */
2882        if (!(adev->flags & AMD_IS_APU))
2883                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2884
2885        udelay(50);
2886}
2887
2888static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2889{
2890        const struct rlc_firmware_header_v2_0 *hdr;
2891        const __le32 *fw_data;
2892        unsigned i, fw_size;
2893
2894        if (!adev->gfx.rlc_fw)
2895                return -EINVAL;
2896
2897        hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898        amdgpu_ucode_print_rlc_hdr(&hdr->header);
2899
2900        fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901                           le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2903
2904        WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905        for (i = 0; i < fw_size; i++)
2906                WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907        WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2908
2909        return 0;
2910}
2911
2912static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2913{
2914        int r;
2915
2916        gfx_v8_0_rlc_stop(adev);
2917
2918        /* disable CG */
2919        WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2920
2921        /* disable PG */
2922        WREG32(mmRLC_PG_CNTL, 0);
2923
2924        gfx_v8_0_rlc_reset(adev);
2925
2926        if (!adev->pp_enabled) {
2927                if (!adev->firmware.smu_load) {
2928                        /* legacy rlc firmware loading */
2929                        r = gfx_v8_0_rlc_load_microcode(adev);
2930                        if (r)
2931                                return r;
2932                } else {
2933                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934                                                        AMDGPU_UCODE_ID_RLC_G);
2935                        if (r)
2936                                return -EINVAL;
2937                }
2938        }
2939
2940        gfx_v8_0_rlc_start(adev);
2941
2942        return 0;
2943}
2944
2945static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2946{
2947        int i;
2948        u32 tmp = RREG32(mmCP_ME_CNTL);
2949
2950        if (enable) {
2951                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2954        } else {
2955                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959                        adev->gfx.gfx_ring[i].ready = false;
2960        }
2961        WREG32(mmCP_ME_CNTL, tmp);
2962        udelay(50);
2963}
2964
2965static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966{
2967        const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968        const struct gfx_firmware_header_v1_0 *ce_hdr;
2969        const struct gfx_firmware_header_v1_0 *me_hdr;
2970        const __le32 *fw_data;
2971        unsigned i, fw_size;
2972
2973        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974                return -EINVAL;
2975
2976        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977                adev->gfx.pfp_fw->data;
2978        ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979                adev->gfx.ce_fw->data;
2980        me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981                adev->gfx.me_fw->data;
2982
2983        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2986
2987        gfx_v8_0_cp_gfx_enable(adev, false);
2988
2989        /* PFP */
2990        fw_data = (const __le32 *)
2991                (adev->gfx.pfp_fw->data +
2992                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994        WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995        for (i = 0; i < fw_size; i++)
2996                WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997        WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998
2999        /* CE */
3000        fw_data = (const __le32 *)
3001                (adev->gfx.ce_fw->data +
3002                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004        WREG32(mmCP_CE_UCODE_ADDR, 0);
3005        for (i = 0; i < fw_size; i++)
3006                WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007        WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008
3009        /* ME */
3010        fw_data = (const __le32 *)
3011                (adev->gfx.me_fw->data +
3012                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014        WREG32(mmCP_ME_RAM_WADDR, 0);
3015        for (i = 0; i < fw_size; i++)
3016                WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017        WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018
3019        return 0;
3020}
3021
3022static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3023{
3024        u32 count = 0;
3025        const struct cs_section_def *sect = NULL;
3026        const struct cs_extent_def *ext = NULL;
3027
3028        /* begin clear state */
3029        count += 2;
3030        /* context control state */
3031        count += 3;
3032
3033        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034                for (ext = sect->section; ext->extent != NULL; ++ext) {
3035                        if (sect->id == SECT_CONTEXT)
3036                                count += 2 + ext->reg_count;
3037                        else
3038                                return 0;
3039                }
3040        }
3041        /* pa_sc_raster_config/pa_sc_raster_config1 */
3042        count += 4;
3043        /* end clear state */
3044        count += 2;
3045        /* clear state */
3046        count += 2;
3047
3048        return count;
3049}
3050
3051static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3052{
3053        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054        const struct cs_section_def *sect = NULL;
3055        const struct cs_extent_def *ext = NULL;
3056        int r, i;
3057
3058        /* init the CP */
3059        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060        WREG32(mmCP_ENDIAN_SWAP, 0);
3061        WREG32(mmCP_DEVICE_ID, 1);
3062
3063        gfx_v8_0_cp_gfx_enable(adev, true);
3064
3065        r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3066        if (r) {
3067                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068                return r;
3069        }
3070
3071        /* clear state buffer */
3072        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3074
3075        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076        amdgpu_ring_write(ring, 0x80000000);
3077        amdgpu_ring_write(ring, 0x80000000);
3078
3079        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080                for (ext = sect->section; ext->extent != NULL; ++ext) {
3081                        if (sect->id == SECT_CONTEXT) {
3082                                amdgpu_ring_write(ring,
3083                                       PACKET3(PACKET3_SET_CONTEXT_REG,
3084                                               ext->reg_count));
3085                                amdgpu_ring_write(ring,
3086                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087                                for (i = 0; i < ext->reg_count; i++)
3088                                        amdgpu_ring_write(ring, ext->extent[i]);
3089                        }
3090                }
3091        }
3092
3093        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095        switch (adev->asic_type) {
3096        case CHIP_TONGA:
3097                amdgpu_ring_write(ring, 0x16000012);
3098                amdgpu_ring_write(ring, 0x0000002A);
3099                break;
3100        case CHIP_FIJI:
3101                amdgpu_ring_write(ring, 0x3a00161a);
3102                amdgpu_ring_write(ring, 0x0000002e);
3103                break;
3104        case CHIP_TOPAZ:
3105        case CHIP_CARRIZO:
3106                amdgpu_ring_write(ring, 0x00000002);
3107                amdgpu_ring_write(ring, 0x00000000);
3108                break;
3109        case CHIP_STONEY:
3110                amdgpu_ring_write(ring, 0x00000000);
3111                amdgpu_ring_write(ring, 0x00000000);
3112                break;
3113        default:
3114                BUG();
3115        }
3116
3117        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3119
3120        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121        amdgpu_ring_write(ring, 0);
3122
3123        /* init the CE partitions */
3124        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126        amdgpu_ring_write(ring, 0x8000);
3127        amdgpu_ring_write(ring, 0x8000);
3128
3129        amdgpu_ring_unlock_commit(ring);
3130
3131        return 0;
3132}
3133
3134static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3135{
3136        struct amdgpu_ring *ring;
3137        u32 tmp;
3138        u32 rb_bufsz;
3139        u64 rb_addr, rptr_addr;
3140        int r;
3141
3142        /* Set the write pointer delay */
3143        WREG32(mmCP_RB_WPTR_DELAY, 0);
3144
3145        /* set the RB to use vmid 0 */
3146        WREG32(mmCP_RB_VMID, 0);
3147
3148        /* Set ring buffer size */
3149        ring = &adev->gfx.gfx_ring[0];
3150        rb_bufsz = order_base_2(ring->ring_size / 8);
3151        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3155#ifdef __BIG_ENDIAN
3156        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3157#endif
3158        WREG32(mmCP_RB0_CNTL, tmp);
3159
3160        /* Initialize the ring buffer's read and write pointers */
3161        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3162        ring->wptr = 0;
3163        WREG32(mmCP_RB0_WPTR, ring->wptr);
3164
3165        /* set the wb address wether it's enabled or not */
3166        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3169
3170        mdelay(1);
3171        WREG32(mmCP_RB0_CNTL, tmp);
3172
3173        rb_addr = ring->gpu_addr >> 8;
3174        WREG32(mmCP_RB0_BASE, rb_addr);
3175        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3176
3177        /* no gfx doorbells on iceland */
3178        if (adev->asic_type != CHIP_TOPAZ) {
3179                tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180                if (ring->use_doorbell) {
3181                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182                                            DOORBELL_OFFSET, ring->doorbell_index);
3183                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3184                                            DOORBELL_EN, 1);
3185                } else {
3186                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3187                                            DOORBELL_EN, 0);
3188                }
3189                WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3190
3191                if (adev->asic_type == CHIP_TONGA) {
3192                        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193                                            DOORBELL_RANGE_LOWER,
3194                                            AMDGPU_DOORBELL_GFX_RING0);
3195                        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3196
3197                        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198                               CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3199                }
3200
3201        }
3202
3203        /* start the ring */
3204        gfx_v8_0_cp_gfx_start(adev);
3205        ring->ready = true;
3206        r = amdgpu_ring_test_ring(ring);
3207        if (r) {
3208                ring->ready = false;
3209                return r;
3210        }
3211
3212        return 0;
3213}
3214
3215static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3216{
3217        int i;
3218
3219        if (enable) {
3220                WREG32(mmCP_MEC_CNTL, 0);
3221        } else {
3222                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223                for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224                        adev->gfx.compute_ring[i].ready = false;
3225        }
3226        udelay(50);
3227}
3228
3229static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3230{
3231        gfx_v8_0_cp_compute_enable(adev, true);
3232
3233        return 0;
3234}
3235
3236static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3237{
3238        const struct gfx_firmware_header_v1_0 *mec_hdr;
3239        const __le32 *fw_data;
3240        unsigned i, fw_size;
3241
3242        if (!adev->gfx.mec_fw)
3243                return -EINVAL;
3244
3245        gfx_v8_0_cp_compute_enable(adev, false);
3246
3247        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3248        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3249
3250        fw_data = (const __le32 *)
3251                (adev->gfx.mec_fw->data +
3252                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3253        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3254
3255        /* MEC1 */
3256        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3257        for (i = 0; i < fw_size; i++)
3258                WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3259        WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3260
3261        /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3262        if (adev->gfx.mec2_fw) {
3263                const struct gfx_firmware_header_v1_0 *mec2_hdr;
3264
3265                mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3266                amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3267
3268                fw_data = (const __le32 *)
3269                        (adev->gfx.mec2_fw->data +
3270                         le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3271                fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3272
3273                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3274                for (i = 0; i < fw_size; i++)
3275                        WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3276                WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3277        }
3278
3279        return 0;
3280}
3281
3282struct vi_mqd {
3283        uint32_t header;  /* ordinal0 */
3284        uint32_t compute_dispatch_initiator;  /* ordinal1 */
3285        uint32_t compute_dim_x;  /* ordinal2 */
3286        uint32_t compute_dim_y;  /* ordinal3 */
3287        uint32_t compute_dim_z;  /* ordinal4 */
3288        uint32_t compute_start_x;  /* ordinal5 */
3289        uint32_t compute_start_y;  /* ordinal6 */
3290        uint32_t compute_start_z;  /* ordinal7 */
3291        uint32_t compute_num_thread_x;  /* ordinal8 */
3292        uint32_t compute_num_thread_y;  /* ordinal9 */
3293        uint32_t compute_num_thread_z;  /* ordinal10 */
3294        uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3295        uint32_t compute_perfcount_enable;  /* ordinal12 */
3296        uint32_t compute_pgm_lo;  /* ordinal13 */
3297        uint32_t compute_pgm_hi;  /* ordinal14 */
3298        uint32_t compute_tba_lo;  /* ordinal15 */
3299        uint32_t compute_tba_hi;  /* ordinal16 */
3300        uint32_t compute_tma_lo;  /* ordinal17 */
3301        uint32_t compute_tma_hi;  /* ordinal18 */
3302        uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3303        uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3304        uint32_t compute_vmid;  /* ordinal21 */
3305        uint32_t compute_resource_limits;  /* ordinal22 */
3306        uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3307        uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3308        uint32_t compute_tmpring_size;  /* ordinal25 */
3309        uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3310        uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3311        uint32_t compute_restart_x;  /* ordinal28 */
3312        uint32_t compute_restart_y;  /* ordinal29 */
3313        uint32_t compute_restart_z;  /* ordinal30 */
3314        uint32_t compute_thread_trace_enable;  /* ordinal31 */
3315        uint32_t compute_misc_reserved;  /* ordinal32 */
3316        uint32_t compute_dispatch_id;  /* ordinal33 */
3317        uint32_t compute_threadgroup_id;  /* ordinal34 */
3318        uint32_t compute_relaunch;  /* ordinal35 */
3319        uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3320        uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3321        uint32_t compute_wave_restore_control;  /* ordinal38 */
3322        uint32_t reserved9;  /* ordinal39 */
3323        uint32_t reserved10;  /* ordinal40 */
3324        uint32_t reserved11;  /* ordinal41 */
3325        uint32_t reserved12;  /* ordinal42 */
3326        uint32_t reserved13;  /* ordinal43 */
3327        uint32_t reserved14;  /* ordinal44 */
3328        uint32_t reserved15;  /* ordinal45 */
3329        uint32_t reserved16;  /* ordinal46 */
3330        uint32_t reserved17;  /* ordinal47 */
3331        uint32_t reserved18;  /* ordinal48 */
3332        uint32_t reserved19;  /* ordinal49 */
3333        uint32_t reserved20;  /* ordinal50 */
3334        uint32_t reserved21;  /* ordinal51 */
3335        uint32_t reserved22;  /* ordinal52 */
3336        uint32_t reserved23;  /* ordinal53 */
3337        uint32_t reserved24;  /* ordinal54 */
3338        uint32_t reserved25;  /* ordinal55 */
3339        uint32_t reserved26;  /* ordinal56 */
3340        uint32_t reserved27;  /* ordinal57 */
3341        uint32_t reserved28;  /* ordinal58 */
3342        uint32_t reserved29;  /* ordinal59 */
3343        uint32_t reserved30;  /* ordinal60 */
3344        uint32_t reserved31;  /* ordinal61 */
3345        uint32_t reserved32;  /* ordinal62 */
3346        uint32_t reserved33;  /* ordinal63 */
3347        uint32_t reserved34;  /* ordinal64 */
3348        uint32_t compute_user_data_0;  /* ordinal65 */
3349        uint32_t compute_user_data_1;  /* ordinal66 */
3350        uint32_t compute_user_data_2;  /* ordinal67 */
3351        uint32_t compute_user_data_3;  /* ordinal68 */
3352        uint32_t compute_user_data_4;  /* ordinal69 */
3353        uint32_t compute_user_data_5;  /* ordinal70 */
3354        uint32_t compute_user_data_6;  /* ordinal71 */
3355        uint32_t compute_user_data_7;  /* ordinal72 */
3356        uint32_t compute_user_data_8;  /* ordinal73 */
3357        uint32_t compute_user_data_9;  /* ordinal74 */
3358        uint32_t compute_user_data_10;  /* ordinal75 */
3359        uint32_t compute_user_data_11;  /* ordinal76 */
3360        uint32_t compute_user_data_12;  /* ordinal77 */
3361        uint32_t compute_user_data_13;  /* ordinal78 */
3362        uint32_t compute_user_data_14;  /* ordinal79 */
3363        uint32_t compute_user_data_15;  /* ordinal80 */
3364        uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3365        uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3366        uint32_t reserved35;  /* ordinal83 */
3367        uint32_t reserved36;  /* ordinal84 */
3368        uint32_t reserved37;  /* ordinal85 */
3369        uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3370        uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3371        uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3372        uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3373        uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3374        uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3375        uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3376        uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3377        uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3378        uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3379        uint32_t reserved38;  /* ordinal96 */
3380        uint32_t reserved39;  /* ordinal97 */
3381        uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3382        uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3383        uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3384        uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3385        uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3386        uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3387        uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3388        uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3389        uint32_t reserved40;  /* ordinal106 */
3390        uint32_t reserved41;  /* ordinal107 */
3391        uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3392        uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3393        uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3394        uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3395        uint32_t reserved42;  /* ordinal112 */
3396        uint32_t reserved43;  /* ordinal113 */
3397        uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3398        uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3399        uint32_t cp_packet_id_lo;  /* ordinal116 */
3400        uint32_t cp_packet_id_hi;  /* ordinal117 */
3401        uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3402        uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3403        uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3404        uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3405        uint32_t gds_save_mask_lo;  /* ordinal122 */
3406        uint32_t gds_save_mask_hi;  /* ordinal123 */
3407        uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3408        uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3409        uint32_t reserved44;  /* ordinal126 */
3410        uint32_t reserved45;  /* ordinal127 */
3411        uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3412        uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3413        uint32_t cp_hqd_active;  /* ordinal130 */
3414        uint32_t cp_hqd_vmid;  /* ordinal131 */
3415        uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3416        uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3417        uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3418        uint32_t cp_hqd_quantum;  /* ordinal135 */
3419        uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3420        uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3421        uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3422        uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3423        uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3424        uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3425        uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3426        uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3427        uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3428        uint32_t cp_hqd_pq_control;  /* ordinal145 */
3429        uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3430        uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3431        uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3432        uint32_t cp_hqd_ib_control;  /* ordinal149 */
3433        uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3434        uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3435        uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3436        uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3437        uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3438        uint32_t cp_hqd_msg_type;  /* ordinal155 */
3439        uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3440        uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3441        uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3442        uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3443        uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3444        uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3445        uint32_t cp_mqd_control;  /* ordinal162 */
3446        uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3447        uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3448        uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3449        uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3450        uint32_t cp_hqd_eop_control;  /* ordinal167 */
3451        uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3452        uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3453        uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3454        uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3455        uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3456        uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3457        uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3458        uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3459        uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3460        uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3461        uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3462        uint32_t cp_hqd_error;  /* ordinal179 */
3463        uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3464        uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3465        uint32_t reserved46;  /* ordinal182 */
3466        uint32_t reserved47;  /* ordinal183 */
3467        uint32_t reserved48;  /* ordinal184 */
3468        uint32_t reserved49;  /* ordinal185 */
3469        uint32_t reserved50;  /* ordinal186 */
3470        uint32_t reserved51;  /* ordinal187 */
3471        uint32_t reserved52;  /* ordinal188 */
3472        uint32_t reserved53;  /* ordinal189 */
3473        uint32_t reserved54;  /* ordinal190 */
3474        uint32_t reserved55;  /* ordinal191 */
3475        uint32_t iqtimer_pkt_header;  /* ordinal192 */
3476        uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3477        uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3478        uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3479        uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3480        uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3481        uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3482        uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3483        uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3484        uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3485        uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3486        uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3487        uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3488        uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3489        uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3490        uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3491        uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3492        uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3493        uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3494        uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3495        uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3496        uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3497        uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3498        uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3499        uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3500        uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3501        uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3502        uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3503        uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3504        uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3505        uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3506        uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3507        uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3508        uint32_t reserved56;  /* ordinal225 */
3509        uint32_t reserved57;  /* ordinal226 */
3510        uint32_t reserved58;  /* ordinal227 */
3511        uint32_t set_resources_header;  /* ordinal228 */
3512        uint32_t set_resources_dw1;  /* ordinal229 */
3513        uint32_t set_resources_dw2;  /* ordinal230 */
3514        uint32_t set_resources_dw3;  /* ordinal231 */
3515        uint32_t set_resources_dw4;  /* ordinal232 */
3516        uint32_t set_resources_dw5;  /* ordinal233 */
3517        uint32_t set_resources_dw6;  /* ordinal234 */
3518        uint32_t set_resources_dw7;  /* ordinal235 */
3519        uint32_t reserved59;  /* ordinal236 */
3520        uint32_t reserved60;  /* ordinal237 */
3521        uint32_t reserved61;  /* ordinal238 */
3522        uint32_t reserved62;  /* ordinal239 */
3523        uint32_t reserved63;  /* ordinal240 */
3524        uint32_t reserved64;  /* ordinal241 */
3525        uint32_t reserved65;  /* ordinal242 */
3526        uint32_t reserved66;  /* ordinal243 */
3527        uint32_t reserved67;  /* ordinal244 */
3528        uint32_t reserved68;  /* ordinal245 */
3529        uint32_t reserved69;  /* ordinal246 */
3530        uint32_t reserved70;  /* ordinal247 */
3531        uint32_t reserved71;  /* ordinal248 */
3532        uint32_t reserved72;  /* ordinal249 */
3533        uint32_t reserved73;  /* ordinal250 */
3534        uint32_t reserved74;  /* ordinal251 */
3535        uint32_t reserved75;  /* ordinal252 */
3536        uint32_t reserved76;  /* ordinal253 */
3537        uint32_t reserved77;  /* ordinal254 */
3538        uint32_t reserved78;  /* ordinal255 */
3539
3540        uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3541};
3542
3543static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3544{
3545        int i, r;
3546
3547        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3548                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3549
3550                if (ring->mqd_obj) {
3551                        r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552                        if (unlikely(r != 0))
3553                                dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3554
3555                        amdgpu_bo_unpin(ring->mqd_obj);
3556                        amdgpu_bo_unreserve(ring->mqd_obj);
3557
3558                        amdgpu_bo_unref(&ring->mqd_obj);
3559                        ring->mqd_obj = NULL;
3560                }
3561        }
3562}
3563
3564static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3565{
3566        int r, i, j;
3567        u32 tmp;
3568        bool use_doorbell = true;
3569        u64 hqd_gpu_addr;
3570        u64 mqd_gpu_addr;
3571        u64 eop_gpu_addr;
3572        u64 wb_gpu_addr;
3573        u32 *buf;
3574        struct vi_mqd *mqd;
3575
3576        /* init the pipes */
3577        mutex_lock(&adev->srbm_mutex);
3578        for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3579                int me = (i < 4) ? 1 : 2;
3580                int pipe = (i < 4) ? i : (i - 4);
3581
3582                eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3583                eop_gpu_addr >>= 8;
3584
3585                vi_srbm_select(adev, me, pipe, 0, 0);
3586
3587                /* write the EOP addr */
3588                WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3589                WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3590
3591                /* set the VMID assigned */
3592                WREG32(mmCP_HQD_VMID, 0);
3593
3594                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3595                tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3596                tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3597                                    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3598                WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3599        }
3600        vi_srbm_select(adev, 0, 0, 0, 0);
3601        mutex_unlock(&adev->srbm_mutex);
3602
3603        /* init the queues.  Just two for now. */
3604        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3605                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3606
3607                if (ring->mqd_obj == NULL) {
3608                        r = amdgpu_bo_create(adev,
3609                                             sizeof(struct vi_mqd),
3610                                             PAGE_SIZE, true,
3611                                             AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3612                                             NULL, &ring->mqd_obj);
3613                        if (r) {
3614                                dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3615                                return r;
3616                        }
3617                }
3618
3619                r = amdgpu_bo_reserve(ring->mqd_obj, false);
3620                if (unlikely(r != 0)) {
3621                        gfx_v8_0_cp_compute_fini(adev);
3622                        return r;
3623                }
3624                r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3625                                  &mqd_gpu_addr);
3626                if (r) {
3627                        dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3628                        gfx_v8_0_cp_compute_fini(adev);
3629                        return r;
3630                }
3631                r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3632                if (r) {
3633                        dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3634                        gfx_v8_0_cp_compute_fini(adev);
3635                        return r;
3636                }
3637
3638                /* init the mqd struct */
3639                memset(buf, 0, sizeof(struct vi_mqd));
3640
3641                mqd = (struct vi_mqd *)buf;
3642                mqd->header = 0xC0310800;
3643                mqd->compute_pipelinestat_enable = 0x00000001;
3644                mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3645                mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3646                mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3647                mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3648                mqd->compute_misc_reserved = 0x00000003;
3649
3650                mutex_lock(&adev->srbm_mutex);
3651                vi_srbm_select(adev, ring->me,
3652                               ring->pipe,
3653                               ring->queue, 0);
3654
3655                /* disable wptr polling */
3656                tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3657                tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658                WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3659
3660                mqd->cp_hqd_eop_base_addr_lo =
3661                        RREG32(mmCP_HQD_EOP_BASE_ADDR);
3662                mqd->cp_hqd_eop_base_addr_hi =
3663                        RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3664
3665                /* enable doorbell? */
3666                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3667                if (use_doorbell) {
3668                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3669                } else {
3670                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3671                }
3672                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3673                mqd->cp_hqd_pq_doorbell_control = tmp;
3674
3675                /* disable the queue if it's active */
3676                mqd->cp_hqd_dequeue_request = 0;
3677                mqd->cp_hqd_pq_rptr = 0;
3678                mqd->cp_hqd_pq_wptr= 0;
3679                if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3680                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3681                        for (j = 0; j < adev->usec_timeout; j++) {
3682                                if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3683                                        break;
3684                                udelay(1);
3685                        }
3686                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3687                        WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3688                        WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3689                }
3690
3691                /* set the pointer to the MQD */
3692                mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3693                mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3694                WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3695                WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3696
3697                /* set MQD vmid to 0 */
3698                tmp = RREG32(mmCP_MQD_CONTROL);
3699                tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3700                WREG32(mmCP_MQD_CONTROL, tmp);
3701                mqd->cp_mqd_control = tmp;
3702
3703                /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3704                hqd_gpu_addr = ring->gpu_addr >> 8;
3705                mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3706                mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3707                WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3708                WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3709
3710                /* set up the HQD, this is similar to CP_RB0_CNTL */
3711                tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3712                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3713                                    (order_base_2(ring->ring_size / 4) - 1));
3714                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3715                               ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3716#ifdef __BIG_ENDIAN
3717                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3718#endif
3719                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3720                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3721                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3722                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3723                WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3724                mqd->cp_hqd_pq_control = tmp;
3725
3726                /* set the wb address wether it's enabled or not */
3727                wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3728                mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3729                mqd->cp_hqd_pq_rptr_report_addr_hi =
3730                        upper_32_bits(wb_gpu_addr) & 0xffff;
3731                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3732                       mqd->cp_hqd_pq_rptr_report_addr_lo);
3733                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3734                       mqd->cp_hqd_pq_rptr_report_addr_hi);
3735
3736                /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3737                wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3738                mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3739                mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3740                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3741                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3742                       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3743
3744                /* enable the doorbell if requested */
3745                if (use_doorbell) {
3746                        if ((adev->asic_type == CHIP_CARRIZO) ||
3747                            (adev->asic_type == CHIP_FIJI) ||
3748                            (adev->asic_type == CHIP_STONEY)) {
3749                                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3750                                       AMDGPU_DOORBELL_KIQ << 2);
3751                                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3752                                       AMDGPU_DOORBELL_MEC_RING7 << 2);
3753                        }
3754                        tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3755                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756                                            DOORBELL_OFFSET, ring->doorbell_index);
3757                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3758                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3759                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3760                        mqd->cp_hqd_pq_doorbell_control = tmp;
3761
3762                } else {
3763                        mqd->cp_hqd_pq_doorbell_control = 0;
3764                }
3765                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3766                       mqd->cp_hqd_pq_doorbell_control);
3767
3768                /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3769                ring->wptr = 0;
3770                mqd->cp_hqd_pq_wptr = ring->wptr;
3771                WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3772                mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3773
3774                /* set the vmid for the queue */
3775                mqd->cp_hqd_vmid = 0;
3776                WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3777
3778                tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3779                tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3780                WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3781                mqd->cp_hqd_persistent_state = tmp;
3782                if (adev->asic_type == CHIP_STONEY) {
3783                        tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3784                        tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3785                        WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3786                }
3787
3788                /* activate the queue */
3789                mqd->cp_hqd_active = 1;
3790                WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3791
3792                vi_srbm_select(adev, 0, 0, 0, 0);
3793                mutex_unlock(&adev->srbm_mutex);
3794
3795                amdgpu_bo_kunmap(ring->mqd_obj);
3796                amdgpu_bo_unreserve(ring->mqd_obj);
3797        }
3798
3799        if (use_doorbell) {
3800                tmp = RREG32(mmCP_PQ_STATUS);
3801                tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802                WREG32(mmCP_PQ_STATUS, tmp);
3803        }
3804
3805        r = gfx_v8_0_cp_compute_start(adev);
3806        if (r)
3807                return r;
3808
3809        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3811
3812                ring->ready = true;
3813                r = amdgpu_ring_test_ring(ring);
3814                if (r)
3815                        ring->ready = false;
3816        }
3817
3818        return 0;
3819}
3820
3821static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3822{
3823        int r;
3824
3825        if (!(adev->flags & AMD_IS_APU))
3826                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3827
3828        if (!adev->pp_enabled) {
3829                if (!adev->firmware.smu_load) {
3830                        /* legacy firmware loading */
3831                        r = gfx_v8_0_cp_gfx_load_microcode(adev);
3832                        if (r)
3833                                return r;
3834
3835                        r = gfx_v8_0_cp_compute_load_microcode(adev);
3836                        if (r)
3837                                return r;
3838                } else {
3839                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3840                                                        AMDGPU_UCODE_ID_CP_CE);
3841                        if (r)
3842                                return -EINVAL;
3843
3844                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3845                                                        AMDGPU_UCODE_ID_CP_PFP);
3846                        if (r)
3847                                return -EINVAL;
3848
3849                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3850                                                        AMDGPU_UCODE_ID_CP_ME);
3851                        if (r)
3852                                return -EINVAL;
3853
3854                        if (adev->asic_type == CHIP_TOPAZ) {
3855                                r = gfx_v8_0_cp_compute_load_microcode(adev);
3856                                if (r)
3857                                        return r;
3858                        } else {
3859                                r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3860                                                                                 AMDGPU_UCODE_ID_CP_MEC1);
3861                                if (r)
3862                                        return -EINVAL;
3863                        }
3864                }
3865        }
3866
3867        r = gfx_v8_0_cp_gfx_resume(adev);
3868        if (r)
3869                return r;
3870
3871        r = gfx_v8_0_cp_compute_resume(adev);
3872        if (r)
3873                return r;
3874
3875        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3876
3877        return 0;
3878}
3879
3880static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3881{
3882        gfx_v8_0_cp_gfx_enable(adev, enable);
3883        gfx_v8_0_cp_compute_enable(adev, enable);
3884}
3885
3886static int gfx_v8_0_hw_init(void *handle)
3887{
3888        int r;
3889        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3890
3891        gfx_v8_0_init_golden_registers(adev);
3892
3893        gfx_v8_0_gpu_init(adev);
3894
3895        r = gfx_v8_0_rlc_resume(adev);
3896        if (r)
3897                return r;
3898
3899        r = gfx_v8_0_cp_resume(adev);
3900        if (r)
3901                return r;
3902
3903        return r;
3904}
3905
3906static int gfx_v8_0_hw_fini(void *handle)
3907{
3908        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3909
3910        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3911        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3912        gfx_v8_0_cp_enable(adev, false);
3913        gfx_v8_0_rlc_stop(adev);
3914        gfx_v8_0_cp_compute_fini(adev);
3915
3916        return 0;
3917}
3918
3919static int gfx_v8_0_suspend(void *handle)
3920{
3921        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3922
3923        return gfx_v8_0_hw_fini(adev);
3924}
3925
3926static int gfx_v8_0_resume(void *handle)
3927{
3928        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3929
3930        return gfx_v8_0_hw_init(adev);
3931}
3932
3933static bool gfx_v8_0_is_idle(void *handle)
3934{
3935        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936
3937        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3938                return false;
3939        else
3940                return true;
3941}
3942
3943static int gfx_v8_0_wait_for_idle(void *handle)
3944{
3945        unsigned i;
3946        u32 tmp;
3947        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3948
3949        for (i = 0; i < adev->usec_timeout; i++) {
3950                /* read MC_STATUS */
3951                tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3952
3953                if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3954                        return 0;
3955                udelay(1);
3956        }
3957        return -ETIMEDOUT;
3958}
3959
3960static void gfx_v8_0_print_status(void *handle)
3961{
3962        int i;
3963        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3964
3965        dev_info(adev->dev, "GFX 8.x registers\n");
3966        dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3967                 RREG32(mmGRBM_STATUS));
3968        dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3969                 RREG32(mmGRBM_STATUS2));
3970        dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3971                 RREG32(mmGRBM_STATUS_SE0));
3972        dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3973                 RREG32(mmGRBM_STATUS_SE1));
3974        dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3975                 RREG32(mmGRBM_STATUS_SE2));
3976        dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3977                 RREG32(mmGRBM_STATUS_SE3));
3978        dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3979        dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3980                 RREG32(mmCP_STALLED_STAT1));
3981        dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3982                 RREG32(mmCP_STALLED_STAT2));
3983        dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3984                 RREG32(mmCP_STALLED_STAT3));
3985        dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3986                 RREG32(mmCP_CPF_BUSY_STAT));
3987        dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3988                 RREG32(mmCP_CPF_STALLED_STAT1));
3989        dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3990        dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3991        dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3992                 RREG32(mmCP_CPC_STALLED_STAT1));
3993        dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3994
3995        for (i = 0; i < 32; i++) {
3996                dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3997                         i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3998        }
3999        for (i = 0; i < 16; i++) {
4000                dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4001                         i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4002        }
4003        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4004                dev_info(adev->dev, "  se: %d\n", i);
4005                gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
4006                dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
4007                         RREG32(mmPA_SC_RASTER_CONFIG));
4008                dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4009                         RREG32(mmPA_SC_RASTER_CONFIG_1));
4010        }
4011        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4012
4013        dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4014                 RREG32(mmGB_ADDR_CONFIG));
4015        dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4016                 RREG32(mmHDP_ADDR_CONFIG));
4017        dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4018                 RREG32(mmDMIF_ADDR_CALC));
4019        dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
4020                 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4021        dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
4022                 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4023        dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4024                 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4025        dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4026                 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4027        dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4028                 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4029
4030        dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4031                 RREG32(mmCP_MEQ_THRESHOLDS));
4032        dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4033                 RREG32(mmSX_DEBUG_1));
4034        dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4035                 RREG32(mmTA_CNTL_AUX));
4036        dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4037                 RREG32(mmSPI_CONFIG_CNTL));
4038        dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4039                 RREG32(mmSQ_CONFIG));
4040        dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4041                 RREG32(mmDB_DEBUG));
4042        dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4043                 RREG32(mmDB_DEBUG2));
4044        dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4045                 RREG32(mmDB_DEBUG3));
4046        dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4047                 RREG32(mmCB_HW_CONTROL));
4048        dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4049                 RREG32(mmSPI_CONFIG_CNTL_1));
4050        dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4051                 RREG32(mmPA_SC_FIFO_SIZE));
4052        dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4053                 RREG32(mmVGT_NUM_INSTANCES));
4054        dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4055                 RREG32(mmCP_PERFMON_CNTL));
4056        dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4057                 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4058        dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4059                 RREG32(mmVGT_CACHE_INVALIDATION));
4060        dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4061                 RREG32(mmVGT_GS_VERTEX_REUSE));
4062        dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4063                 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4064        dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4065                 RREG32(mmPA_CL_ENHANCE));
4066        dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4067                 RREG32(mmPA_SC_ENHANCE));
4068
4069        dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4070                 RREG32(mmCP_ME_CNTL));
4071        dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4072                 RREG32(mmCP_MAX_CONTEXT));
4073        dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4074                 RREG32(mmCP_ENDIAN_SWAP));
4075        dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4076                 RREG32(mmCP_DEVICE_ID));
4077
4078        dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4079                 RREG32(mmCP_SEM_WAIT_TIMER));
4080
4081        dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4082                 RREG32(mmCP_RB_WPTR_DELAY));
4083        dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4084                 RREG32(mmCP_RB_VMID));
4085        dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4086                 RREG32(mmCP_RB0_CNTL));
4087        dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4088                 RREG32(mmCP_RB0_WPTR));
4089        dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4090                 RREG32(mmCP_RB0_RPTR_ADDR));
4091        dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4092                 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4093        dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4094                 RREG32(mmCP_RB0_CNTL));
4095        dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4096                 RREG32(mmCP_RB0_BASE));
4097        dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4098                 RREG32(mmCP_RB0_BASE_HI));
4099        dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4100                 RREG32(mmCP_MEC_CNTL));
4101        dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4102                 RREG32(mmCP_CPF_DEBUG));
4103
4104        dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4105                 RREG32(mmSCRATCH_ADDR));
4106        dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4107                 RREG32(mmSCRATCH_UMSK));
4108
4109        dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4110                 RREG32(mmCP_INT_CNTL_RING0));
4111        dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4112                 RREG32(mmRLC_LB_CNTL));
4113        dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4114                 RREG32(mmRLC_CNTL));
4115        dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4116                 RREG32(mmRLC_CGCG_CGLS_CTRL));
4117        dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4118                 RREG32(mmRLC_LB_CNTR_INIT));
4119        dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4120                 RREG32(mmRLC_LB_CNTR_MAX));
4121        dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4122                 RREG32(mmRLC_LB_INIT_CU_MASK));
4123        dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4124                 RREG32(mmRLC_LB_PARAMS));
4125        dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4126                 RREG32(mmRLC_LB_CNTL));
4127        dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4128                 RREG32(mmRLC_MC_CNTL));
4129        dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4130                 RREG32(mmRLC_UCODE_CNTL));
4131
4132        mutex_lock(&adev->srbm_mutex);
4133        for (i = 0; i < 16; i++) {
4134                vi_srbm_select(adev, 0, 0, 0, i);
4135                dev_info(adev->dev, "  VM %d:\n", i);
4136                dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4137                         RREG32(mmSH_MEM_CONFIG));
4138                dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4139                         RREG32(mmSH_MEM_APE1_BASE));
4140                dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4141                         RREG32(mmSH_MEM_APE1_LIMIT));
4142                dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4143                         RREG32(mmSH_MEM_BASES));
4144        }
4145        vi_srbm_select(adev, 0, 0, 0, 0);
4146        mutex_unlock(&adev->srbm_mutex);
4147}
4148
4149static int gfx_v8_0_soft_reset(void *handle)
4150{
4151        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4152        u32 tmp;
4153        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4154
4155        /* GRBM_STATUS */
4156        tmp = RREG32(mmGRBM_STATUS);
4157        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4158                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4159                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4160                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4161                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4162                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4163                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4164                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4165                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4166                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4167        }
4168
4169        if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4170                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4171                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4172                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4173                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4174        }
4175
4176        /* GRBM_STATUS2 */
4177        tmp = RREG32(mmGRBM_STATUS2);
4178        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4179                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4180                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4181
4182        /* SRBM_STATUS */
4183        tmp = RREG32(mmSRBM_STATUS);
4184        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4185                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4186                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4187
4188        if (grbm_soft_reset || srbm_soft_reset) {
4189                gfx_v8_0_print_status((void *)adev);
4190                /* stop the rlc */
4191                gfx_v8_0_rlc_stop(adev);
4192
4193                /* Disable GFX parsing/prefetching */
4194                gfx_v8_0_cp_gfx_enable(adev, false);
4195
4196                /* Disable MEC parsing/prefetching */
4197                gfx_v8_0_cp_compute_enable(adev, false);
4198
4199                if (grbm_soft_reset || srbm_soft_reset) {
4200                        tmp = RREG32(mmGMCON_DEBUG);
4201                        tmp = REG_SET_FIELD(tmp,
4202                                            GMCON_DEBUG, GFX_STALL, 1);
4203                        tmp = REG_SET_FIELD(tmp,
4204                                            GMCON_DEBUG, GFX_CLEAR, 1);
4205                        WREG32(mmGMCON_DEBUG, tmp);
4206
4207                        udelay(50);
4208                }
4209
4210                if (grbm_soft_reset) {
4211                        tmp = RREG32(mmGRBM_SOFT_RESET);
4212                        tmp |= grbm_soft_reset;
4213                        dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4214                        WREG32(mmGRBM_SOFT_RESET, tmp);
4215                        tmp = RREG32(mmGRBM_SOFT_RESET);
4216
4217                        udelay(50);
4218
4219                        tmp &= ~grbm_soft_reset;
4220                        WREG32(mmGRBM_SOFT_RESET, tmp);
4221                        tmp = RREG32(mmGRBM_SOFT_RESET);
4222                }
4223
4224                if (srbm_soft_reset) {
4225                        tmp = RREG32(mmSRBM_SOFT_RESET);
4226                        tmp |= srbm_soft_reset;
4227                        dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4228                        WREG32(mmSRBM_SOFT_RESET, tmp);
4229                        tmp = RREG32(mmSRBM_SOFT_RESET);
4230
4231                        udelay(50);
4232
4233                        tmp &= ~srbm_soft_reset;
4234                        WREG32(mmSRBM_SOFT_RESET, tmp);
4235                        tmp = RREG32(mmSRBM_SOFT_RESET);
4236                }
4237
4238                if (grbm_soft_reset || srbm_soft_reset) {
4239                        tmp = RREG32(mmGMCON_DEBUG);
4240                        tmp = REG_SET_FIELD(tmp,
4241                                            GMCON_DEBUG, GFX_STALL, 0);
4242                        tmp = REG_SET_FIELD(tmp,
4243                                            GMCON_DEBUG, GFX_CLEAR, 0);
4244                        WREG32(mmGMCON_DEBUG, tmp);
4245                }
4246
4247                /* Wait a little for things to settle down */
4248                udelay(50);
4249                gfx_v8_0_print_status((void *)adev);
4250        }
4251        return 0;
4252}
4253
4254/**
4255 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4256 *
4257 * @adev: amdgpu_device pointer
4258 *
4259 * Fetches a GPU clock counter snapshot.
4260 * Returns the 64 bit clock counter snapshot.
4261 */
4262uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4263{
4264        uint64_t clock;
4265
4266        mutex_lock(&adev->gfx.gpu_clock_mutex);
4267        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4268        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4269                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4270        mutex_unlock(&adev->gfx.gpu_clock_mutex);
4271        return clock;
4272}
4273
4274static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4275                                          uint32_t vmid,
4276                                          uint32_t gds_base, uint32_t gds_size,
4277                                          uint32_t gws_base, uint32_t gws_size,
4278                                          uint32_t oa_base, uint32_t oa_size)
4279{
4280        gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4281        gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4282
4283        gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4284        gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4285
4286        oa_base = oa_base >> AMDGPU_OA_SHIFT;
4287        oa_size = oa_size >> AMDGPU_OA_SHIFT;
4288
4289        /* GDS Base */
4290        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4291        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4292                                WRITE_DATA_DST_SEL(0)));
4293        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4294        amdgpu_ring_write(ring, 0);
4295        amdgpu_ring_write(ring, gds_base);
4296
4297        /* GDS Size */
4298        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4299        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4300                                WRITE_DATA_DST_SEL(0)));
4301        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4302        amdgpu_ring_write(ring, 0);
4303        amdgpu_ring_write(ring, gds_size);
4304
4305        /* GWS */
4306        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4307        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4308                                WRITE_DATA_DST_SEL(0)));
4309        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4310        amdgpu_ring_write(ring, 0);
4311        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4312
4313        /* OA */
4314        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4315        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4316                                WRITE_DATA_DST_SEL(0)));
4317        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4318        amdgpu_ring_write(ring, 0);
4319        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4320}
4321
4322static int gfx_v8_0_early_init(void *handle)
4323{
4324        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4325
4326        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4327        adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4328        gfx_v8_0_set_ring_funcs(adev);
4329        gfx_v8_0_set_irq_funcs(adev);
4330        gfx_v8_0_set_gds_init(adev);
4331
4332        return 0;
4333}
4334
4335static int gfx_v8_0_late_init(void *handle)
4336{
4337        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4338        int r;
4339
4340        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4341        if (r)
4342                return r;
4343
4344        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4345        if (r)
4346                return r;
4347
4348        /* requires IBs so do in late init after IB pool is initialized */
4349        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4350        if (r)
4351                return r;
4352
4353        return 0;
4354}
4355
4356static int gfx_v8_0_set_powergating_state(void *handle,
4357                                          enum amd_powergating_state state)
4358{
4359        return 0;
4360}
4361
4362static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4363                uint32_t reg_addr, uint32_t cmd)
4364{
4365        uint32_t data;
4366
4367        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4368
4369        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4370        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4371
4372        data = RREG32(mmRLC_SERDES_WR_CTRL);
4373        data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4374                        RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4375                        RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4376                        RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4377                        RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4378                        RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4379                        RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4380                        RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4381                        RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4382                        RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4383                        RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4384        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4385                        (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4386                        (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4387                        (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4388
4389        WREG32(mmRLC_SERDES_WR_CTRL, data);
4390}
4391
4392static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4393                bool enable)
4394{
4395        uint32_t temp, data;
4396
4397        /* It is disabled by HW by default */
4398        if (enable) {
4399                /* 1 - RLC memory Light sleep */
4400                temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4401                data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4402                if (temp != data)
4403                        WREG32(mmRLC_MEM_SLP_CNTL, data);
4404
4405                /* 2 - CP memory Light sleep */
4406                temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4407                data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4408                if (temp != data)
4409                        WREG32(mmCP_MEM_SLP_CNTL, data);
4410
4411                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4412                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4413                data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4414                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4415                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4416                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4417
4418                if (temp != data)
4419                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4420
4421                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4422                gfx_v8_0_wait_for_rlc_serdes(adev);
4423
4424                /* 5 - clear mgcg override */
4425                fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4426
4427                /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4428                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4429                data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4430                data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4431                data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4432                data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4433                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4434                data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4435                data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4436                if (temp != data)
4437                        WREG32(mmCGTS_SM_CTRL_REG, data);
4438                udelay(50);
4439
4440                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4441                gfx_v8_0_wait_for_rlc_serdes(adev);
4442        } else {
4443                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4444                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4446                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4447                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4448                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4449                if (temp != data)
4450                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4451
4452                /* 2 - disable MGLS in RLC */
4453                data = RREG32(mmRLC_MEM_SLP_CNTL);
4454                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4455                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4456                        WREG32(mmRLC_MEM_SLP_CNTL, data);
4457                }
4458
4459                /* 3 - disable MGLS in CP */
4460                data = RREG32(mmCP_MEM_SLP_CNTL);
4461                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4462                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4463                        WREG32(mmCP_MEM_SLP_CNTL, data);
4464                }
4465
4466                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4467                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4468                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4469                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4470                if (temp != data)
4471                        WREG32(mmCGTS_SM_CTRL_REG, data);
4472
4473                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4474                gfx_v8_0_wait_for_rlc_serdes(adev);
4475
4476                /* 6 - set mgcg override */
4477                fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4478
4479                udelay(50);
4480
4481                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4482                gfx_v8_0_wait_for_rlc_serdes(adev);
4483        }
4484}
4485
4486static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4487                bool enable)
4488{
4489        uint32_t temp, temp1, data, data1;
4490
4491        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4492
4493        if (enable) {
4494                /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4495                 * Cmp_busy/GFX_Idle interrupts
4496                 */
4497                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4498
4499                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4500                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4501                if (temp1 != data1)
4502                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4503
4504                /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4505                gfx_v8_0_wait_for_rlc_serdes(adev);
4506
4507                /* 3 - clear cgcg override */
4508                fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4509
4510                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4511                gfx_v8_0_wait_for_rlc_serdes(adev);
4512
4513                /* 4 - write cmd to set CGLS */
4514                fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4515
4516                /* 5 - enable cgcg */
4517                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4518
4519                /* enable cgls*/
4520                data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4521
4522                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4523                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4524
4525                if (temp1 != data1)
4526                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4527
4528                if (temp != data)
4529                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4530        } else {
4531                /* disable cntx_empty_int_enable & GFX Idle interrupt */
4532                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4533
4534                /* TEST CGCG */
4535                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4536                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4537                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4538                if (temp1 != data1)
4539                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4540
4541                /* read gfx register to wake up cgcg */
4542                RREG32(mmCB_CGTT_SCLK_CTRL);
4543                RREG32(mmCB_CGTT_SCLK_CTRL);
4544                RREG32(mmCB_CGTT_SCLK_CTRL);
4545                RREG32(mmCB_CGTT_SCLK_CTRL);
4546
4547                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4548                gfx_v8_0_wait_for_rlc_serdes(adev);
4549
4550                /* write cmd to Set CGCG Overrride */
4551                fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4552
4553                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4554                gfx_v8_0_wait_for_rlc_serdes(adev);
4555
4556                /* write cmd to Clear CGLS */
4557                fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4558
4559                /* disable cgcg, cgls should be disabled too. */
4560                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4561                                RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4562                if (temp != data)
4563                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4564        }
4565}
4566static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4567                bool enable)
4568{
4569        if (enable) {
4570                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4571                 * ===  MGCG + MGLS + TS(CG/LS) ===
4572                 */
4573                fiji_update_medium_grain_clock_gating(adev, enable);
4574                fiji_update_coarse_grain_clock_gating(adev, enable);
4575        } else {
4576                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4577                 * ===  CGCG + CGLS ===
4578                 */
4579                fiji_update_coarse_grain_clock_gating(adev, enable);
4580                fiji_update_medium_grain_clock_gating(adev, enable);
4581        }
4582        return 0;
4583}
4584
4585static int gfx_v8_0_set_clockgating_state(void *handle,
4586                                          enum amd_clockgating_state state)
4587{
4588        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4589
4590        switch (adev->asic_type) {
4591        case CHIP_FIJI:
4592                fiji_update_gfx_clock_gating(adev,
4593                                state == AMD_CG_STATE_GATE ? true : false);
4594                break;
4595        default:
4596                break;
4597        }
4598        return 0;
4599}
4600
4601static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4602{
4603        u32 rptr;
4604
4605        rptr = ring->adev->wb.wb[ring->rptr_offs];
4606
4607        return rptr;
4608}
4609
4610static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4611{
4612        struct amdgpu_device *adev = ring->adev;
4613        u32 wptr;
4614
4615        if (ring->use_doorbell)
4616                /* XXX check if swapping is necessary on BE */
4617                wptr = ring->adev->wb.wb[ring->wptr_offs];
4618        else
4619                wptr = RREG32(mmCP_RB0_WPTR);
4620
4621        return wptr;
4622}
4623
4624static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4625{
4626        struct amdgpu_device *adev = ring->adev;
4627
4628        if (ring->use_doorbell) {
4629                /* XXX check if swapping is necessary on BE */
4630                adev->wb.wb[ring->wptr_offs] = ring->wptr;
4631                WDOORBELL32(ring->doorbell_index, ring->wptr);
4632        } else {
4633                WREG32(mmCP_RB0_WPTR, ring->wptr);
4634                (void)RREG32(mmCP_RB0_WPTR);
4635        }
4636}
4637
4638static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4639{
4640        u32 ref_and_mask, reg_mem_engine;
4641
4642        if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4643                switch (ring->me) {
4644                case 1:
4645                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4646                        break;
4647                case 2:
4648                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4649                        break;
4650                default:
4651                        return;
4652                }
4653                reg_mem_engine = 0;
4654        } else {
4655                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4656                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4657        }
4658
4659        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4660        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4661                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4662                                 reg_mem_engine));
4663        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4664        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4665        amdgpu_ring_write(ring, ref_and_mask);
4666        amdgpu_ring_write(ring, ref_and_mask);
4667        amdgpu_ring_write(ring, 0x20); /* poll interval */
4668}
4669
4670static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4671                                  struct amdgpu_ib *ib)
4672{
4673        bool need_ctx_switch = ring->current_ctx != ib->ctx;
4674        u32 header, control = 0;
4675        u32 next_rptr = ring->wptr + 5;
4676
4677        /* drop the CE preamble IB for the same context */
4678        if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4679                return;
4680
4681        if (need_ctx_switch)
4682                next_rptr += 2;
4683
4684        next_rptr += 4;
4685        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4686        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4687        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4688        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4689        amdgpu_ring_write(ring, next_rptr);
4690
4691        /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4692        if (need_ctx_switch) {
4693                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4694                amdgpu_ring_write(ring, 0);
4695        }
4696
4697        if (ib->flags & AMDGPU_IB_FLAG_CE)
4698                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4699        else
4700                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4701
4702        control |= ib->length_dw |
4703                (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4704
4705        amdgpu_ring_write(ring, header);
4706        amdgpu_ring_write(ring,
4707#ifdef __BIG_ENDIAN
4708                          (2 << 0) |
4709#endif
4710                          (ib->gpu_addr & 0xFFFFFFFC));
4711        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4712        amdgpu_ring_write(ring, control);
4713}
4714
4715static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4716                                  struct amdgpu_ib *ib)
4717{
4718        u32 header, control = 0;
4719        u32 next_rptr = ring->wptr + 5;
4720
4721        control |= INDIRECT_BUFFER_VALID;
4722
4723        next_rptr += 4;
4724        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4725        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4726        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4727        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4728        amdgpu_ring_write(ring, next_rptr);
4729
4730        header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4731
4732        control |= ib->length_dw |
4733                           (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4734
4735        amdgpu_ring_write(ring, header);
4736        amdgpu_ring_write(ring,
4737#ifdef __BIG_ENDIAN
4738                                          (2 << 0) |
4739#endif
4740                                          (ib->gpu_addr & 0xFFFFFFFC));
4741        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4742        amdgpu_ring_write(ring, control);
4743}
4744
4745static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4746                                         u64 seq, unsigned flags)
4747{
4748        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4749        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4750
4751        /* EVENT_WRITE_EOP - flush caches, send int */
4752        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4753        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4754                                 EOP_TC_ACTION_EN |
4755                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4756                                 EVENT_INDEX(5)));
4757        amdgpu_ring_write(ring, addr & 0xfffffffc);
4758        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4759                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4760        amdgpu_ring_write(ring, lower_32_bits(seq));
4761        amdgpu_ring_write(ring, upper_32_bits(seq));
4762
4763}
4764
4765/**
4766 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4767 *
4768 * @ring: amdgpu ring buffer object
4769 * @semaphore: amdgpu semaphore object
4770 * @emit_wait: Is this a sempahore wait?
4771 *
4772 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4773 * from running ahead of semaphore waits.
4774 */
4775static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4776                                         struct amdgpu_semaphore *semaphore,
4777                                         bool emit_wait)
4778{
4779        uint64_t addr = semaphore->gpu_addr;
4780        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4781
4782        if (ring->adev->asic_type == CHIP_TOPAZ ||
4783            ring->adev->asic_type == CHIP_TONGA ||
4784            ring->adev->asic_type == CHIP_FIJI)
4785                /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4786                return false;
4787        else {
4788                amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4789                amdgpu_ring_write(ring, lower_32_bits(addr));
4790                amdgpu_ring_write(ring, upper_32_bits(addr));
4791                amdgpu_ring_write(ring, sel);
4792        }
4793
4794        if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4795                /* Prevent the PFP from running ahead of the semaphore wait */
4796                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4797                amdgpu_ring_write(ring, 0x0);
4798        }
4799
4800        return true;
4801}
4802
4803static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4804                                        unsigned vm_id, uint64_t pd_addr)
4805{
4806        int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4807        uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4808        uint64_t addr = ring->fence_drv.gpu_addr;
4809
4810        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4811        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4812                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4813                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4814        amdgpu_ring_write(ring, addr & 0xfffffffc);
4815        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4816        amdgpu_ring_write(ring, seq);
4817        amdgpu_ring_write(ring, 0xffffffff);
4818        amdgpu_ring_write(ring, 4); /* poll interval */
4819
4820        if (usepfp) {
4821                /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4822                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4823                amdgpu_ring_write(ring, 0);
4824                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4825                amdgpu_ring_write(ring, 0);
4826        }
4827
4828        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4829        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4830                                 WRITE_DATA_DST_SEL(0)) |
4831                                 WR_CONFIRM);
4832        if (vm_id < 8) {
4833                amdgpu_ring_write(ring,
4834                                  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4835        } else {
4836                amdgpu_ring_write(ring,
4837                                  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4838        }
4839        amdgpu_ring_write(ring, 0);
4840        amdgpu_ring_write(ring, pd_addr >> 12);
4841
4842        /* bits 0-15 are the VM contexts0-15 */
4843        /* invalidate the cache */
4844        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4845        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4846                                 WRITE_DATA_DST_SEL(0)));
4847        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4848        amdgpu_ring_write(ring, 0);
4849        amdgpu_ring_write(ring, 1 << vm_id);
4850
4851        /* wait for the invalidate to complete */
4852        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4853        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4854                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4855                                 WAIT_REG_MEM_ENGINE(0))); /* me */
4856        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4857        amdgpu_ring_write(ring, 0);
4858        amdgpu_ring_write(ring, 0); /* ref */
4859        amdgpu_ring_write(ring, 0); /* mask */
4860        amdgpu_ring_write(ring, 0x20); /* poll interval */
4861
4862        /* compute doesn't have PFP */
4863        if (usepfp) {
4864                /* sync PFP to ME, otherwise we might get invalid PFP reads */
4865                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4866                amdgpu_ring_write(ring, 0x0);
4867                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4868                amdgpu_ring_write(ring, 0);
4869                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4870                amdgpu_ring_write(ring, 0);
4871        }
4872}
4873
4874static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4875{
4876        return ring->adev->wb.wb[ring->rptr_offs];
4877}
4878
4879static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4880{
4881        return ring->adev->wb.wb[ring->wptr_offs];
4882}
4883
4884static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4885{
4886        struct amdgpu_device *adev = ring->adev;
4887
4888        /* XXX check if swapping is necessary on BE */
4889        adev->wb.wb[ring->wptr_offs] = ring->wptr;
4890        WDOORBELL32(ring->doorbell_index, ring->wptr);
4891}
4892
4893static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4894                                             u64 addr, u64 seq,
4895                                             unsigned flags)
4896{
4897        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4898        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4899
4900        /* RELEASE_MEM - flush caches, send int */
4901        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4902        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4903                                 EOP_TC_ACTION_EN |
4904                                 EOP_TC_WB_ACTION_EN |
4905                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4906                                 EVENT_INDEX(5)));
4907        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4908        amdgpu_ring_write(ring, addr & 0xfffffffc);
4909        amdgpu_ring_write(ring, upper_32_bits(addr));
4910        amdgpu_ring_write(ring, lower_32_bits(seq));
4911        amdgpu_ring_write(ring, upper_32_bits(seq));
4912}
4913
4914static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4915                                                 enum amdgpu_interrupt_state state)
4916{
4917        u32 cp_int_cntl;
4918
4919        switch (state) {
4920        case AMDGPU_IRQ_STATE_DISABLE:
4921                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4922                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4923                                            TIME_STAMP_INT_ENABLE, 0);
4924                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4925                break;
4926        case AMDGPU_IRQ_STATE_ENABLE:
4927                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4928                cp_int_cntl =
4929                        REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4930                                      TIME_STAMP_INT_ENABLE, 1);
4931                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4932                break;
4933        default:
4934                break;
4935        }
4936}
4937
4938static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4939                                                     int me, int pipe,
4940                                                     enum amdgpu_interrupt_state state)
4941{
4942        u32 mec_int_cntl, mec_int_cntl_reg;
4943
4944        /*
4945         * amdgpu controls only pipe 0 of MEC1. That's why this function only
4946         * handles the setting of interrupts for this specific pipe. All other
4947         * pipes' interrupts are set by amdkfd.
4948         */
4949
4950        if (me == 1) {
4951                switch (pipe) {
4952                case 0:
4953                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4954                        break;
4955                default:
4956                        DRM_DEBUG("invalid pipe %d\n", pipe);
4957                        return;
4958                }
4959        } else {
4960                DRM_DEBUG("invalid me %d\n", me);
4961                return;
4962        }
4963
4964        switch (state) {
4965        case AMDGPU_IRQ_STATE_DISABLE:
4966                mec_int_cntl = RREG32(mec_int_cntl_reg);
4967                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4968                                             TIME_STAMP_INT_ENABLE, 0);
4969                WREG32(mec_int_cntl_reg, mec_int_cntl);
4970                break;
4971        case AMDGPU_IRQ_STATE_ENABLE:
4972                mec_int_cntl = RREG32(mec_int_cntl_reg);
4973                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4974                                             TIME_STAMP_INT_ENABLE, 1);
4975                WREG32(mec_int_cntl_reg, mec_int_cntl);
4976                break;
4977        default:
4978                break;
4979        }
4980}
4981
4982static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4983                                             struct amdgpu_irq_src *source,
4984                                             unsigned type,
4985                                             enum amdgpu_interrupt_state state)
4986{
4987        u32 cp_int_cntl;
4988
4989        switch (state) {
4990        case AMDGPU_IRQ_STATE_DISABLE:
4991                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4992                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4993                                            PRIV_REG_INT_ENABLE, 0);
4994                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4995                break;
4996        case AMDGPU_IRQ_STATE_ENABLE:
4997                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4998                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4999                                            PRIV_REG_INT_ENABLE, 1);
5000                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5001                break;
5002        default:
5003                break;
5004        }
5005
5006        return 0;
5007}
5008
5009static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5010                                              struct amdgpu_irq_src *source,
5011                                              unsigned type,
5012                                              enum amdgpu_interrupt_state state)
5013{
5014        u32 cp_int_cntl;
5015
5016        switch (state) {
5017        case AMDGPU_IRQ_STATE_DISABLE:
5018                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5019                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5020                                            PRIV_INSTR_INT_ENABLE, 0);
5021                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5022                break;
5023        case AMDGPU_IRQ_STATE_ENABLE:
5024                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5025                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5026                                            PRIV_INSTR_INT_ENABLE, 1);
5027                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5028                break;
5029        default:
5030                break;
5031        }
5032
5033        return 0;
5034}
5035
5036static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5037                                            struct amdgpu_irq_src *src,
5038                                            unsigned type,
5039                                            enum amdgpu_interrupt_state state)
5040{
5041        switch (type) {
5042        case AMDGPU_CP_IRQ_GFX_EOP:
5043                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5044                break;
5045        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5046                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5047                break;
5048        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5049                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5050                break;
5051        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5052                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5053                break;
5054        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5055                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5056                break;
5057        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5058                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5059                break;
5060        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5061                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5062                break;
5063        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5064                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5065                break;
5066        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5067                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5068                break;
5069        default:
5070                break;
5071        }
5072        return 0;
5073}
5074
5075static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5076                            struct amdgpu_irq_src *source,
5077                            struct amdgpu_iv_entry *entry)
5078{
5079        int i;
5080        u8 me_id, pipe_id, queue_id;
5081        struct amdgpu_ring *ring;
5082
5083        DRM_DEBUG("IH: CP EOP\n");
5084        me_id = (entry->ring_id & 0x0c) >> 2;
5085        pipe_id = (entry->ring_id & 0x03) >> 0;
5086        queue_id = (entry->ring_id & 0x70) >> 4;
5087
5088        switch (me_id) {
5089        case 0:
5090                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5091                break;
5092        case 1:
5093        case 2:
5094                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5095                        ring = &adev->gfx.compute_ring[i];
5096                        /* Per-queue interrupt is supported for MEC starting from VI.
5097                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
5098                          */
5099                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5100                                amdgpu_fence_process(ring);
5101                }
5102                break;
5103        }
5104        return 0;
5105}
5106
5107static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5108                                 struct amdgpu_irq_src *source,
5109                                 struct amdgpu_iv_entry *entry)
5110{
5111        DRM_ERROR("Illegal register access in command stream\n");
5112        schedule_work(&adev->reset_work);
5113        return 0;
5114}
5115
5116static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5117                                  struct amdgpu_irq_src *source,
5118                                  struct amdgpu_iv_entry *entry)
5119{
5120        DRM_ERROR("Illegal instruction in command stream\n");
5121        schedule_work(&adev->reset_work);
5122        return 0;
5123}
5124
5125const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5126        .early_init = gfx_v8_0_early_init,
5127        .late_init = gfx_v8_0_late_init,
5128        .sw_init = gfx_v8_0_sw_init,
5129        .sw_fini = gfx_v8_0_sw_fini,
5130        .hw_init = gfx_v8_0_hw_init,
5131        .hw_fini = gfx_v8_0_hw_fini,
5132        .suspend = gfx_v8_0_suspend,
5133        .resume = gfx_v8_0_resume,
5134        .is_idle = gfx_v8_0_is_idle,
5135        .wait_for_idle = gfx_v8_0_wait_for_idle,
5136        .soft_reset = gfx_v8_0_soft_reset,
5137        .print_status = gfx_v8_0_print_status,
5138        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5139        .set_powergating_state = gfx_v8_0_set_powergating_state,
5140};
5141
5142static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5143        .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5144        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5145        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5146        .parse_cs = NULL,
5147        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5148        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5149        .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5150        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5151        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5152        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5153        .test_ring = gfx_v8_0_ring_test_ring,
5154        .test_ib = gfx_v8_0_ring_test_ib,
5155        .insert_nop = amdgpu_ring_insert_nop,
5156};
5157
5158static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5159        .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5160        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5161        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5162        .parse_cs = NULL,
5163        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5164        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5165        .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5166        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5167        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5168        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5169        .test_ring = gfx_v8_0_ring_test_ring,
5170        .test_ib = gfx_v8_0_ring_test_ib,
5171        .insert_nop = amdgpu_ring_insert_nop,
5172};
5173
5174static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5175{
5176        int i;
5177
5178        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5179                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5180
5181        for (i = 0; i < adev->gfx.num_compute_rings; i++)
5182                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5183}
5184
5185static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5186        .set = gfx_v8_0_set_eop_interrupt_state,
5187        .process = gfx_v8_0_eop_irq,
5188};
5189
5190static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5191        .set = gfx_v8_0_set_priv_reg_fault_state,
5192        .process = gfx_v8_0_priv_reg_irq,
5193};
5194
5195static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5196        .set = gfx_v8_0_set_priv_inst_fault_state,
5197        .process = gfx_v8_0_priv_inst_irq,
5198};
5199
5200static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5201{
5202        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5203        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5204
5205        adev->gfx.priv_reg_irq.num_types = 1;
5206        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5207
5208        adev->gfx.priv_inst_irq.num_types = 1;
5209        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5210}
5211
5212static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5213{
5214        /* init asci gds info */
5215        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5216        adev->gds.gws.total_size = 64;
5217        adev->gds.oa.total_size = 16;
5218
5219        if (adev->gds.mem.total_size == 64 * 1024) {
5220                adev->gds.mem.gfx_partition_size = 4096;
5221                adev->gds.mem.cs_partition_size = 4096;
5222
5223                adev->gds.gws.gfx_partition_size = 4;
5224                adev->gds.gws.cs_partition_size = 4;
5225
5226                adev->gds.oa.gfx_partition_size = 4;
5227                adev->gds.oa.cs_partition_size = 1;
5228        } else {
5229                adev->gds.mem.gfx_partition_size = 1024;
5230                adev->gds.mem.cs_partition_size = 1024;
5231
5232                adev->gds.gws.gfx_partition_size = 16;
5233                adev->gds.gws.cs_partition_size = 16;
5234
5235                adev->gds.oa.gfx_partition_size = 4;
5236                adev->gds.oa.cs_partition_size = 4;
5237        }
5238}
5239
5240static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5241                u32 se, u32 sh)
5242{
5243        u32 mask = 0, tmp, tmp1;
5244        int i;
5245
5246        gfx_v8_0_select_se_sh(adev, se, sh);
5247        tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5248        tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5249        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5250
5251        tmp &= 0xffff0000;
5252
5253        tmp |= tmp1;
5254        tmp >>= 16;
5255
5256        for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5257                mask <<= 1;
5258                mask |= 1;
5259        }
5260
5261        return (~tmp) & mask;
5262}
5263
5264int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5265                                                 struct amdgpu_cu_info *cu_info)
5266{
5267        int i, j, k, counter, active_cu_number = 0;
5268        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5269
5270        if (!adev || !cu_info)
5271                return -EINVAL;
5272
5273        mutex_lock(&adev->grbm_idx_mutex);
5274        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5275                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5276                        mask = 1;
5277                        ao_bitmap = 0;
5278                        counter = 0;
5279                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5280                        cu_info->bitmap[i][j] = bitmap;
5281
5282                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5283                                if (bitmap & mask) {
5284                                        if (counter < 2)
5285                                                ao_bitmap |= mask;
5286                                        counter ++;
5287                                }
5288                                mask <<= 1;
5289                        }
5290                        active_cu_number += counter;
5291                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5292                }
5293        }
5294
5295        cu_info->number = active_cu_number;
5296        cu_info->ao_cu_mask = ao_cu_mask;
5297        mutex_unlock(&adev->grbm_idx_mutex);
5298        return 0;
5299}
5300