linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include "drmP.h"
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
  27#include "vi.h"
  28#include "vid.h"
  29#include "amdgpu_ucode.h"
  30#include "amdgpu_atombios.h"
  31#include "atombios_i2c.h"
  32#include "clearstate_vi.h"
  33
  34#include "gmc/gmc_8_2_d.h"
  35#include "gmc/gmc_8_2_sh_mask.h"
  36
  37#include "oss/oss_3_0_d.h"
  38#include "oss/oss_3_0_sh_mask.h"
  39
  40#include "bif/bif_5_0_d.h"
  41#include "bif/bif_5_0_sh_mask.h"
  42
  43#include "gca/gfx_8_0_d.h"
  44#include "gca/gfx_8_0_enum.h"
  45#include "gca/gfx_8_0_sh_mask.h"
  46#include "gca/gfx_8_0_enum.h"
  47
  48#include "dce/dce_10_0_d.h"
  49#include "dce/dce_10_0_sh_mask.h"
  50
  51#include "smu/smu_7_1_3_d.h"
  52
  53#define GFX8_NUM_GFX_RINGS     1
  54#define GFX8_NUM_COMPUTE_RINGS 8
  55
  56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  59#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  60
  61#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  62#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  63#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  64#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  65#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  66#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  67#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  68#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  69#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  70
  71#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  72#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  73#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  74#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  76#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  77
  78/* BPM SERDES CMD */
  79#define SET_BPM_SERDES_CMD    1
  80#define CLE_BPM_SERDES_CMD    0
  81
  82/* BPM Register Address*/
  83enum {
  84        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  85        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  86        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  87        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  88        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  89        BPM_REG_FGCG_MAX
  90};
  91
  92#define RLC_FormatDirectRegListLength        14
  93
  94MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  95MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 100
 101MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 102MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 108MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 115MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 119
 120MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 121MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 140
 141static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 142{
 143        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 144        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 145        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 146        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 147        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 148        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 149        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 150        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 151        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 152        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 153        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 154        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 155        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 156        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 157        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 158        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 159};
 160
 161static const u32 golden_settings_tonga_a11[] =
 162{
 163        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 164        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 165        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 166        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 167        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 168        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 169        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 170        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 171        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 172        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 173        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 174        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 175        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 176        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 177        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 178};
 179
 180static const u32 tonga_golden_common_all[] =
 181{
 182        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 183        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 184        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 185        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 186        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 187        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 188        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 189        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 190};
 191
 192static const u32 tonga_mgcg_cgcg_init[] =
 193{
 194        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 195        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 196        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 197        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 198        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 199        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 200        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 201        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 202        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 203        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 204        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 205        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 206        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 207        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 208        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 209        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 210        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 211        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 212        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 213        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 214        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 215        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 216        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 217        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 218        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 219        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 220        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 221        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 222        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 223        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 224        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 225        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 226        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 227        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 228        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 229        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 230        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 231        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 232        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 233        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 234        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 235        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 236        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 237        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 238        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 239        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 240        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 241        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 242        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 243        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 244        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 245        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 246        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 247        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 248        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 249        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 250        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 251        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 252        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 253        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 254        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 255        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 256        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 257        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 258        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 259        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 260        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 261        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 262        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 263        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 264        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 265        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 266        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 267        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 268        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 269};
 270
 271static const u32 golden_settings_polaris11_a11[] =
 272{
 273        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
 274        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 275        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 276        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 277        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 278        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 279        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 280        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 281        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 282        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 283        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 284        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 285        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 286        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 287        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 288        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 289};
 290
 291static const u32 polaris11_golden_common_all[] =
 292{
 293        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 294        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 295        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 296        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 297        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 298        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 299};
 300
 301static const u32 golden_settings_polaris10_a11[] =
 302{
 303        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 304        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 305        mmCB_HW_CONTROL_2, 0, 0x0f000000,
 306        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 307        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 308        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 309        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 310        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 311        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 312        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 313        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 314        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 315        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 316        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 317        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 318        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 319        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 320};
 321
 322static const u32 polaris10_golden_common_all[] =
 323{
 324        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 325        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 326        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 327        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 328        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 329        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 330        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 331        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 332};
 333
 334static const u32 fiji_golden_common_all[] =
 335{
 336        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 337        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 338        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 339        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 340        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 341        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 342        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 343        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 344        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 345        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 346};
 347
 348static const u32 golden_settings_fiji_a10[] =
 349{
 350        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 351        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 352        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 353        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 354        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 355        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 356        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 357        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 358        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 359        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 360        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 361};
 362
 363static const u32 fiji_mgcg_cgcg_init[] =
 364{
 365        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 366        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 367        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 368        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 369        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 370        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 371        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 372        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 373        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 374        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 375        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 376        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 377        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 378        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 379        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 380        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 381        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 382        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 383        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 384        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 385        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 386        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 387        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 388        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 389        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 390        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 391        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 392        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 393        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 394        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 395        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 396        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 397        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 398        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 399        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 400};
 401
 402static const u32 golden_settings_iceland_a11[] =
 403{
 404        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 405        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 406        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 407        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 408        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 409        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 410        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 411        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 412        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 413        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 414        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 415        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 416        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 417        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 418        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 419};
 420
 421static const u32 iceland_golden_common_all[] =
 422{
 423        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 424        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 425        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 426        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 427        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 428        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 429        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 430        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 431};
 432
 433static const u32 iceland_mgcg_cgcg_init[] =
 434{
 435        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 436        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 437        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 438        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 439        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 440        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 441        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 442        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 443        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 444        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 445        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 446        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 447        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 448        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 449        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 450        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 451        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 452        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 453        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 454        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 455        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 456        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 457        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 458        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 459        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 460        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 461        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 462        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 464        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 465        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 466        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 467        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 468        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 469        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 470        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 471        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 472        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 473        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 474        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 475        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 476        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 477        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 478        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 479        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 480        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 481        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 482        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 483        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 484        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 485        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 486        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 487        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 488        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 489        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 490        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 491        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 492        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 493        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 494        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 495        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 496        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 497        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 498        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 499};
 500
 501static const u32 cz_golden_settings_a11[] =
 502{
 503        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 504        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 505        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 506        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 507        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 508        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 509        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 510        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 511        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 512        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 513};
 514
 515static const u32 cz_golden_common_all[] =
 516{
 517        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 518        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 519        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 520        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 521        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 522        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 523        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 524        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 525};
 526
 527static const u32 cz_mgcg_cgcg_init[] =
 528{
 529        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 530        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 531        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 532        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 533        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 534        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 535        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 536        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 537        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 538        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 539        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 540        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 541        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 542        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 543        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 544        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 545        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 546        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 547        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 548        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 549        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 550        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 551        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 552        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 553        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 554        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 555        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 556        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 557        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 558        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 559        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 560        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 561        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 562        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 563        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 564        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 565        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 566        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 567        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 568        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 569        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 570        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 571        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 572        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 573        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 574        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 575        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 576        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 577        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 578        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 579        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 580        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 581        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 582        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 583        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 584        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 585        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 586        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 587        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 588        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 589        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 590        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 591        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 592        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 593        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 594        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 595        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 596        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 597        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 598        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 599        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 600        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 601        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 602        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 603        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 604};
 605
 606static const u32 stoney_golden_settings_a11[] =
 607{
 608        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 609        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 610        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 611        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 612        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 613        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 614        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 615        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 616        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 617        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 618};
 619
 620static const u32 stoney_golden_common_all[] =
 621{
 622        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 623        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 624        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 625        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 626        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 627        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 628        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 629        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 630};
 631
 632static const u32 stoney_mgcg_cgcg_init[] =
 633{
 634        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 635        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 636        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 637        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 638        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 639        mmATC_MISC_CG, 0xffffffff, 0x000c0200,
 640};
 641
 642static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 643static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 644static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 645static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 646static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 647static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 648
 649static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 650{
 651        switch (adev->asic_type) {
 652        case CHIP_TOPAZ:
 653                amdgpu_program_register_sequence(adev,
 654                                                 iceland_mgcg_cgcg_init,
 655                                                 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 656                amdgpu_program_register_sequence(adev,
 657                                                 golden_settings_iceland_a11,
 658                                                 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 659                amdgpu_program_register_sequence(adev,
 660                                                 iceland_golden_common_all,
 661                                                 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 662                break;
 663        case CHIP_FIJI:
 664                amdgpu_program_register_sequence(adev,
 665                                                 fiji_mgcg_cgcg_init,
 666                                                 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 667                amdgpu_program_register_sequence(adev,
 668                                                 golden_settings_fiji_a10,
 669                                                 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 670                amdgpu_program_register_sequence(adev,
 671                                                 fiji_golden_common_all,
 672                                                 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 673                break;
 674
 675        case CHIP_TONGA:
 676                amdgpu_program_register_sequence(adev,
 677                                                 tonga_mgcg_cgcg_init,
 678                                                 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 679                amdgpu_program_register_sequence(adev,
 680                                                 golden_settings_tonga_a11,
 681                                                 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 682                amdgpu_program_register_sequence(adev,
 683                                                 tonga_golden_common_all,
 684                                                 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 685                break;
 686        case CHIP_POLARIS11:
 687                amdgpu_program_register_sequence(adev,
 688                                                 golden_settings_polaris11_a11,
 689                                                 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
 690                amdgpu_program_register_sequence(adev,
 691                                                 polaris11_golden_common_all,
 692                                                 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
 693                break;
 694        case CHIP_POLARIS10:
 695                amdgpu_program_register_sequence(adev,
 696                                                 golden_settings_polaris10_a11,
 697                                                 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
 698                amdgpu_program_register_sequence(adev,
 699                                                 polaris10_golden_common_all,
 700                                                 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
 701                WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 702                if (adev->pdev->revision == 0xc7) {
 703                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 704                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 705                }
 706                break;
 707        case CHIP_CARRIZO:
 708                amdgpu_program_register_sequence(adev,
 709                                                 cz_mgcg_cgcg_init,
 710                                                 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 711                amdgpu_program_register_sequence(adev,
 712                                                 cz_golden_settings_a11,
 713                                                 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 714                amdgpu_program_register_sequence(adev,
 715                                                 cz_golden_common_all,
 716                                                 (const u32)ARRAY_SIZE(cz_golden_common_all));
 717                break;
 718        case CHIP_STONEY:
 719                amdgpu_program_register_sequence(adev,
 720                                                 stoney_mgcg_cgcg_init,
 721                                                 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 722                amdgpu_program_register_sequence(adev,
 723                                                 stoney_golden_settings_a11,
 724                                                 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 725                amdgpu_program_register_sequence(adev,
 726                                                 stoney_golden_common_all,
 727                                                 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 728                break;
 729        default:
 730                break;
 731        }
 732}
 733
 734static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 735{
 736        int i;
 737
 738        adev->gfx.scratch.num_reg = 7;
 739        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 740        for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 741                adev->gfx.scratch.free[i] = true;
 742                adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 743        }
 744}
 745
 746static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 747{
 748        struct amdgpu_device *adev = ring->adev;
 749        uint32_t scratch;
 750        uint32_t tmp = 0;
 751        unsigned i;
 752        int r;
 753
 754        r = amdgpu_gfx_scratch_get(adev, &scratch);
 755        if (r) {
 756                DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 757                return r;
 758        }
 759        WREG32(scratch, 0xCAFEDEAD);
 760        r = amdgpu_ring_alloc(ring, 3);
 761        if (r) {
 762                DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 763                          ring->idx, r);
 764                amdgpu_gfx_scratch_free(adev, scratch);
 765                return r;
 766        }
 767        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 768        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 769        amdgpu_ring_write(ring, 0xDEADBEEF);
 770        amdgpu_ring_commit(ring);
 771
 772        for (i = 0; i < adev->usec_timeout; i++) {
 773                tmp = RREG32(scratch);
 774                if (tmp == 0xDEADBEEF)
 775                        break;
 776                DRM_UDELAY(1);
 777        }
 778        if (i < adev->usec_timeout) {
 779                DRM_INFO("ring test on %d succeeded in %d usecs\n",
 780                         ring->idx, i);
 781        } else {
 782                DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 783                          ring->idx, scratch, tmp);
 784                r = -EINVAL;
 785        }
 786        amdgpu_gfx_scratch_free(adev, scratch);
 787        return r;
 788}
 789
 790static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 791{
 792        struct amdgpu_device *adev = ring->adev;
 793        struct amdgpu_ib ib;
 794        struct fence *f = NULL;
 795        uint32_t scratch;
 796        uint32_t tmp = 0;
 797        unsigned i;
 798        int r;
 799
 800        r = amdgpu_gfx_scratch_get(adev, &scratch);
 801        if (r) {
 802                DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
 803                return r;
 804        }
 805        WREG32(scratch, 0xCAFEDEAD);
 806        memset(&ib, 0, sizeof(ib));
 807        r = amdgpu_ib_get(adev, NULL, 256, &ib);
 808        if (r) {
 809                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 810                goto err1;
 811        }
 812        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 813        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 814        ib.ptr[2] = 0xDEADBEEF;
 815        ib.length_dw = 3;
 816
 817        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 818        if (r)
 819                goto err2;
 820
 821        r = fence_wait(f, false);
 822        if (r) {
 823                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 824                goto err2;
 825        }
 826        for (i = 0; i < adev->usec_timeout; i++) {
 827                tmp = RREG32(scratch);
 828                if (tmp == 0xDEADBEEF)
 829                        break;
 830                DRM_UDELAY(1);
 831        }
 832        if (i < adev->usec_timeout) {
 833                DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
 834                         ring->idx, i);
 835                goto err2;
 836        } else {
 837                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 838                          scratch, tmp);
 839                r = -EINVAL;
 840        }
 841err2:
 842        fence_put(f);
 843        amdgpu_ib_free(adev, &ib, NULL);
 844        fence_put(f);
 845err1:
 846        amdgpu_gfx_scratch_free(adev, scratch);
 847        return r;
 848}
 849
 850
 851static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
 852        release_firmware(adev->gfx.pfp_fw);
 853        adev->gfx.pfp_fw = NULL;
 854        release_firmware(adev->gfx.me_fw);
 855        adev->gfx.me_fw = NULL;
 856        release_firmware(adev->gfx.ce_fw);
 857        adev->gfx.ce_fw = NULL;
 858        release_firmware(adev->gfx.rlc_fw);
 859        adev->gfx.rlc_fw = NULL;
 860        release_firmware(adev->gfx.mec_fw);
 861        adev->gfx.mec_fw = NULL;
 862        if ((adev->asic_type != CHIP_STONEY) &&
 863            (adev->asic_type != CHIP_TOPAZ))
 864                release_firmware(adev->gfx.mec2_fw);
 865        adev->gfx.mec2_fw = NULL;
 866
 867        kfree(adev->gfx.rlc.register_list_format);
 868}
 869
 870static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 871{
 872        const char *chip_name;
 873        char fw_name[30];
 874        int err;
 875        struct amdgpu_firmware_info *info = NULL;
 876        const struct common_firmware_header *header = NULL;
 877        const struct gfx_firmware_header_v1_0 *cp_hdr;
 878        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 879        unsigned int *tmp = NULL, i;
 880
 881        DRM_DEBUG("\n");
 882
 883        switch (adev->asic_type) {
 884        case CHIP_TOPAZ:
 885                chip_name = "topaz";
 886                break;
 887        case CHIP_TONGA:
 888                chip_name = "tonga";
 889                break;
 890        case CHIP_CARRIZO:
 891                chip_name = "carrizo";
 892                break;
 893        case CHIP_FIJI:
 894                chip_name = "fiji";
 895                break;
 896        case CHIP_POLARIS11:
 897                chip_name = "polaris11";
 898                break;
 899        case CHIP_POLARIS10:
 900                chip_name = "polaris10";
 901                break;
 902        case CHIP_STONEY:
 903                chip_name = "stoney";
 904                break;
 905        default:
 906                BUG();
 907        }
 908
 909        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 910        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 911        if (err)
 912                goto out;
 913        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 914        if (err)
 915                goto out;
 916        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 917        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 918        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 919
 920        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 921        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 922        if (err)
 923                goto out;
 924        err = amdgpu_ucode_validate(adev->gfx.me_fw);
 925        if (err)
 926                goto out;
 927        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 928        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 929        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 930
 931        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 932        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 933        if (err)
 934                goto out;
 935        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 936        if (err)
 937                goto out;
 938        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 939        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 940        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 941
 942        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 943        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 944        if (err)
 945                goto out;
 946        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 947        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 948        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 949        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 950
 951        adev->gfx.rlc.save_and_restore_offset =
 952                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
 953        adev->gfx.rlc.clear_state_descriptor_offset =
 954                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 955        adev->gfx.rlc.avail_scratch_ram_locations =
 956                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 957        adev->gfx.rlc.reg_restore_list_size =
 958                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
 959        adev->gfx.rlc.reg_list_format_start =
 960                        le32_to_cpu(rlc_hdr->reg_list_format_start);
 961        adev->gfx.rlc.reg_list_format_separate_start =
 962                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 963        adev->gfx.rlc.starting_offsets_start =
 964                        le32_to_cpu(rlc_hdr->starting_offsets_start);
 965        adev->gfx.rlc.reg_list_format_size_bytes =
 966                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 967        adev->gfx.rlc.reg_list_size_bytes =
 968                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 969
 970        adev->gfx.rlc.register_list_format =
 971                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 972                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 973
 974        if (!adev->gfx.rlc.register_list_format) {
 975                err = -ENOMEM;
 976                goto out;
 977        }
 978
 979        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 980                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
 981        for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
 982                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
 983
 984        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
 985
 986        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 987                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
 988        for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 989                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 990
 991        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 992        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 993        if (err)
 994                goto out;
 995        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 996        if (err)
 997                goto out;
 998        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 999        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1001
1002        if ((adev->asic_type != CHIP_STONEY) &&
1003            (adev->asic_type != CHIP_TOPAZ)) {
1004                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006                if (!err) {
1007                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008                        if (err)
1009                                goto out;
1010                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011                                adev->gfx.mec2_fw->data;
1012                        adev->gfx.mec2_fw_version =
1013                                le32_to_cpu(cp_hdr->header.ucode_version);
1014                        adev->gfx.mec2_feature_version =
1015                                le32_to_cpu(cp_hdr->ucode_feature_version);
1016                } else {
1017                        err = 0;
1018                        adev->gfx.mec2_fw = NULL;
1019                }
1020        }
1021
1022        if (adev->firmware.smu_load) {
1023                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024                info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025                info->fw = adev->gfx.pfp_fw;
1026                header = (const struct common_firmware_header *)info->fw->data;
1027                adev->firmware.fw_size +=
1028                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031                info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032                info->fw = adev->gfx.me_fw;
1033                header = (const struct common_firmware_header *)info->fw->data;
1034                adev->firmware.fw_size +=
1035                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036
1037                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038                info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039                info->fw = adev->gfx.ce_fw;
1040                header = (const struct common_firmware_header *)info->fw->data;
1041                adev->firmware.fw_size +=
1042                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043
1044                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045                info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046                info->fw = adev->gfx.rlc_fw;
1047                header = (const struct common_firmware_header *)info->fw->data;
1048                adev->firmware.fw_size +=
1049                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053                info->fw = adev->gfx.mec_fw;
1054                header = (const struct common_firmware_header *)info->fw->data;
1055                adev->firmware.fw_size +=
1056                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                if (adev->gfx.mec2_fw) {
1059                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060                        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061                        info->fw = adev->gfx.mec2_fw;
1062                        header = (const struct common_firmware_header *)info->fw->data;
1063                        adev->firmware.fw_size +=
1064                                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065                }
1066
1067        }
1068
1069out:
1070        if (err) {
1071                dev_err(adev->dev,
1072                        "gfx8: Failed to load firmware \"%s\"\n",
1073                        fw_name);
1074                release_firmware(adev->gfx.pfp_fw);
1075                adev->gfx.pfp_fw = NULL;
1076                release_firmware(adev->gfx.me_fw);
1077                adev->gfx.me_fw = NULL;
1078                release_firmware(adev->gfx.ce_fw);
1079                adev->gfx.ce_fw = NULL;
1080                release_firmware(adev->gfx.rlc_fw);
1081                adev->gfx.rlc_fw = NULL;
1082                release_firmware(adev->gfx.mec_fw);
1083                adev->gfx.mec_fw = NULL;
1084                release_firmware(adev->gfx.mec2_fw);
1085                adev->gfx.mec2_fw = NULL;
1086        }
1087        return err;
1088}
1089
1090static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091                                    volatile u32 *buffer)
1092{
1093        u32 count = 0, i;
1094        const struct cs_section_def *sect = NULL;
1095        const struct cs_extent_def *ext = NULL;
1096
1097        if (adev->gfx.rlc.cs_data == NULL)
1098                return;
1099        if (buffer == NULL)
1100                return;
1101
1102        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104
1105        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106        buffer[count++] = cpu_to_le32(0x80000000);
1107        buffer[count++] = cpu_to_le32(0x80000000);
1108
1109        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110                for (ext = sect->section; ext->extent != NULL; ++ext) {
1111                        if (sect->id == SECT_CONTEXT) {
1112                                buffer[count++] =
1113                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114                                buffer[count++] = cpu_to_le32(ext->reg_index -
1115                                                PACKET3_SET_CONTEXT_REG_START);
1116                                for (i = 0; i < ext->reg_count; i++)
1117                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1118                        } else {
1119                                return;
1120                        }
1121                }
1122        }
1123
1124        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126                        PACKET3_SET_CONTEXT_REG_START);
1127        switch (adev->asic_type) {
1128        case CHIP_TONGA:
1129        case CHIP_POLARIS10:
1130                buffer[count++] = cpu_to_le32(0x16000012);
1131                buffer[count++] = cpu_to_le32(0x0000002A);
1132                break;
1133        case CHIP_POLARIS11:
1134                buffer[count++] = cpu_to_le32(0x16000012);
1135                buffer[count++] = cpu_to_le32(0x00000000);
1136                break;
1137        case CHIP_FIJI:
1138                buffer[count++] = cpu_to_le32(0x3a00161a);
1139                buffer[count++] = cpu_to_le32(0x0000002e);
1140                break;
1141        case CHIP_TOPAZ:
1142        case CHIP_CARRIZO:
1143                buffer[count++] = cpu_to_le32(0x00000002);
1144                buffer[count++] = cpu_to_le32(0x00000000);
1145                break;
1146        case CHIP_STONEY:
1147                buffer[count++] = cpu_to_le32(0x00000000);
1148                buffer[count++] = cpu_to_le32(0x00000000);
1149                break;
1150        default:
1151                buffer[count++] = cpu_to_le32(0x00000000);
1152                buffer[count++] = cpu_to_le32(0x00000000);
1153                break;
1154        }
1155
1156        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158
1159        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160        buffer[count++] = cpu_to_le32(0);
1161}
1162
1163static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1164{
1165        int r;
1166
1167        /* clear state block */
1168        if (adev->gfx.rlc.clear_state_obj) {
1169                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1170                if (unlikely(r != 0))
1171                        dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1172                amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1173                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1174
1175                amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1176                adev->gfx.rlc.clear_state_obj = NULL;
1177        }
1178}
1179
1180static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1181{
1182        volatile u32 *dst_ptr;
1183        u32 dws;
1184        const struct cs_section_def *cs_data;
1185        int r;
1186
1187        adev->gfx.rlc.cs_data = vi_cs_data;
1188
1189        cs_data = adev->gfx.rlc.cs_data;
1190
1191        if (cs_data) {
1192                /* clear state block */
1193                adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1194
1195                if (adev->gfx.rlc.clear_state_obj == NULL) {
1196                        r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1197                                             AMDGPU_GEM_DOMAIN_VRAM,
1198                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1199                                             NULL, NULL,
1200                                             &adev->gfx.rlc.clear_state_obj);
1201                        if (r) {
1202                                dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1203                                gfx_v8_0_rlc_fini(adev);
1204                                return r;
1205                        }
1206                }
1207                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1208                if (unlikely(r != 0)) {
1209                        gfx_v8_0_rlc_fini(adev);
1210                        return r;
1211                }
1212                r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1213                                  &adev->gfx.rlc.clear_state_gpu_addr);
1214                if (r) {
1215                        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1216                        dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1217                        gfx_v8_0_rlc_fini(adev);
1218                        return r;
1219                }
1220
1221                r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1222                if (r) {
1223                        dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1224                        gfx_v8_0_rlc_fini(adev);
1225                        return r;
1226                }
1227                /* set up the cs buffer */
1228                dst_ptr = adev->gfx.rlc.cs_ptr;
1229                gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1230                amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1231                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1232        }
1233
1234        return 0;
1235}
1236
1237static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1238{
1239        int r;
1240
1241        if (adev->gfx.mec.hpd_eop_obj) {
1242                r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1243                if (unlikely(r != 0))
1244                        dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1245                amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1246                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1247
1248                amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1249                adev->gfx.mec.hpd_eop_obj = NULL;
1250        }
1251}
1252
1253#define MEC_HPD_SIZE 2048
1254
1255static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1256{
1257        int r;
1258        u32 *hpd;
1259
1260        /*
1261         * we assign only 1 pipe because all other pipes will
1262         * be handled by KFD
1263         */
1264        adev->gfx.mec.num_mec = 1;
1265        adev->gfx.mec.num_pipe = 1;
1266        adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1267
1268        if (adev->gfx.mec.hpd_eop_obj == NULL) {
1269                r = amdgpu_bo_create(adev,
1270                                     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1271                                     PAGE_SIZE, true,
1272                                     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1273                                     &adev->gfx.mec.hpd_eop_obj);
1274                if (r) {
1275                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1276                        return r;
1277                }
1278        }
1279
1280        r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1281        if (unlikely(r != 0)) {
1282                gfx_v8_0_mec_fini(adev);
1283                return r;
1284        }
1285        r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1286                          &adev->gfx.mec.hpd_eop_gpu_addr);
1287        if (r) {
1288                dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1289                gfx_v8_0_mec_fini(adev);
1290                return r;
1291        }
1292        r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1293        if (r) {
1294                dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1295                gfx_v8_0_mec_fini(adev);
1296                return r;
1297        }
1298
1299        memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1300
1301        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1302        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1303
1304        return 0;
1305}
1306
1307static const u32 vgpr_init_compute_shader[] =
1308{
1309        0x7e000209, 0x7e020208,
1310        0x7e040207, 0x7e060206,
1311        0x7e080205, 0x7e0a0204,
1312        0x7e0c0203, 0x7e0e0202,
1313        0x7e100201, 0x7e120200,
1314        0x7e140209, 0x7e160208,
1315        0x7e180207, 0x7e1a0206,
1316        0x7e1c0205, 0x7e1e0204,
1317        0x7e200203, 0x7e220202,
1318        0x7e240201, 0x7e260200,
1319        0x7e280209, 0x7e2a0208,
1320        0x7e2c0207, 0x7e2e0206,
1321        0x7e300205, 0x7e320204,
1322        0x7e340203, 0x7e360202,
1323        0x7e380201, 0x7e3a0200,
1324        0x7e3c0209, 0x7e3e0208,
1325        0x7e400207, 0x7e420206,
1326        0x7e440205, 0x7e460204,
1327        0x7e480203, 0x7e4a0202,
1328        0x7e4c0201, 0x7e4e0200,
1329        0x7e500209, 0x7e520208,
1330        0x7e540207, 0x7e560206,
1331        0x7e580205, 0x7e5a0204,
1332        0x7e5c0203, 0x7e5e0202,
1333        0x7e600201, 0x7e620200,
1334        0x7e640209, 0x7e660208,
1335        0x7e680207, 0x7e6a0206,
1336        0x7e6c0205, 0x7e6e0204,
1337        0x7e700203, 0x7e720202,
1338        0x7e740201, 0x7e760200,
1339        0x7e780209, 0x7e7a0208,
1340        0x7e7c0207, 0x7e7e0206,
1341        0xbf8a0000, 0xbf810000,
1342};
1343
1344static const u32 sgpr_init_compute_shader[] =
1345{
1346        0xbe8a0100, 0xbe8c0102,
1347        0xbe8e0104, 0xbe900106,
1348        0xbe920108, 0xbe940100,
1349        0xbe960102, 0xbe980104,
1350        0xbe9a0106, 0xbe9c0108,
1351        0xbe9e0100, 0xbea00102,
1352        0xbea20104, 0xbea40106,
1353        0xbea60108, 0xbea80100,
1354        0xbeaa0102, 0xbeac0104,
1355        0xbeae0106, 0xbeb00108,
1356        0xbeb20100, 0xbeb40102,
1357        0xbeb60104, 0xbeb80106,
1358        0xbeba0108, 0xbebc0100,
1359        0xbebe0102, 0xbec00104,
1360        0xbec20106, 0xbec40108,
1361        0xbec60100, 0xbec80102,
1362        0xbee60004, 0xbee70005,
1363        0xbeea0006, 0xbeeb0007,
1364        0xbee80008, 0xbee90009,
1365        0xbefc0000, 0xbf8a0000,
1366        0xbf810000, 0x00000000,
1367};
1368
1369static const u32 vgpr_init_regs[] =
1370{
1371        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1372        mmCOMPUTE_RESOURCE_LIMITS, 0,
1373        mmCOMPUTE_NUM_THREAD_X, 256*4,
1374        mmCOMPUTE_NUM_THREAD_Y, 1,
1375        mmCOMPUTE_NUM_THREAD_Z, 1,
1376        mmCOMPUTE_PGM_RSRC2, 20,
1377        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1378        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1379        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1380        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1381        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1382        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1383        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1384        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1385        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1386        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1387};
1388
1389static const u32 sgpr1_init_regs[] =
1390{
1391        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1392        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1393        mmCOMPUTE_NUM_THREAD_X, 256*5,
1394        mmCOMPUTE_NUM_THREAD_Y, 1,
1395        mmCOMPUTE_NUM_THREAD_Z, 1,
1396        mmCOMPUTE_PGM_RSRC2, 20,
1397        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1407};
1408
1409static const u32 sgpr2_init_regs[] =
1410{
1411        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1412        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1413        mmCOMPUTE_NUM_THREAD_X, 256*5,
1414        mmCOMPUTE_NUM_THREAD_Y, 1,
1415        mmCOMPUTE_NUM_THREAD_Z, 1,
1416        mmCOMPUTE_PGM_RSRC2, 20,
1417        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1418        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1419        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1420        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1421        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1422        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1423        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1424        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1425        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1426        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1427};
1428
1429static const u32 sec_ded_counter_registers[] =
1430{
1431        mmCPC_EDC_ATC_CNT,
1432        mmCPC_EDC_SCRATCH_CNT,
1433        mmCPC_EDC_UCODE_CNT,
1434        mmCPF_EDC_ATC_CNT,
1435        mmCPF_EDC_ROQ_CNT,
1436        mmCPF_EDC_TAG_CNT,
1437        mmCPG_EDC_ATC_CNT,
1438        mmCPG_EDC_DMA_CNT,
1439        mmCPG_EDC_TAG_CNT,
1440        mmDC_EDC_CSINVOC_CNT,
1441        mmDC_EDC_RESTORE_CNT,
1442        mmDC_EDC_STATE_CNT,
1443        mmGDS_EDC_CNT,
1444        mmGDS_EDC_GRBM_CNT,
1445        mmGDS_EDC_OA_DED,
1446        mmSPI_EDC_CNT,
1447        mmSQC_ATC_EDC_GATCL1_CNT,
1448        mmSQC_EDC_CNT,
1449        mmSQ_EDC_DED_CNT,
1450        mmSQ_EDC_INFO,
1451        mmSQ_EDC_SEC_CNT,
1452        mmTCC_EDC_CNT,
1453        mmTCP_ATC_EDC_GATCL1_CNT,
1454        mmTCP_EDC_CNT,
1455        mmTD_EDC_CNT
1456};
1457
1458static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1459{
1460        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1461        struct amdgpu_ib ib;
1462        struct fence *f = NULL;
1463        int r, i;
1464        u32 tmp;
1465        unsigned total_size, vgpr_offset, sgpr_offset;
1466        u64 gpu_addr;
1467
1468        /* only supported on CZ */
1469        if (adev->asic_type != CHIP_CARRIZO)
1470                return 0;
1471
1472        /* bail if the compute ring is not ready */
1473        if (!ring->ready)
1474                return 0;
1475
1476        tmp = RREG32(mmGB_EDC_MODE);
1477        WREG32(mmGB_EDC_MODE, 0);
1478
1479        total_size =
1480                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1481        total_size +=
1482                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1483        total_size +=
1484                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1485        total_size = ALIGN(total_size, 256);
1486        vgpr_offset = total_size;
1487        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1488        sgpr_offset = total_size;
1489        total_size += sizeof(sgpr_init_compute_shader);
1490
1491        /* allocate an indirect buffer to put the commands in */
1492        memset(&ib, 0, sizeof(ib));
1493        r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1494        if (r) {
1495                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1496                return r;
1497        }
1498
1499        /* load the compute shaders */
1500        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1501                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1502
1503        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1504                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1505
1506        /* init the ib length to 0 */
1507        ib.length_dw = 0;
1508
1509        /* VGPR */
1510        /* write the register state for the compute dispatch */
1511        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1512                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1513                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1514                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1515        }
1516        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1517        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1518        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1519        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1520        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1521        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1522
1523        /* write dispatch packet */
1524        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1525        ib.ptr[ib.length_dw++] = 8; /* x */
1526        ib.ptr[ib.length_dw++] = 1; /* y */
1527        ib.ptr[ib.length_dw++] = 1; /* z */
1528        ib.ptr[ib.length_dw++] =
1529                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1530
1531        /* write CS partial flush packet */
1532        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1533        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1534
1535        /* SGPR1 */
1536        /* write the register state for the compute dispatch */
1537        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1538                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1539                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1540                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1541        }
1542        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1543        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1544        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1545        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1546        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1547        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1548
1549        /* write dispatch packet */
1550        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1551        ib.ptr[ib.length_dw++] = 8; /* x */
1552        ib.ptr[ib.length_dw++] = 1; /* y */
1553        ib.ptr[ib.length_dw++] = 1; /* z */
1554        ib.ptr[ib.length_dw++] =
1555                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1556
1557        /* write CS partial flush packet */
1558        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1559        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1560
1561        /* SGPR2 */
1562        /* write the register state for the compute dispatch */
1563        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1564                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1566                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1567        }
1568        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1570        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574
1575        /* write dispatch packet */
1576        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577        ib.ptr[ib.length_dw++] = 8; /* x */
1578        ib.ptr[ib.length_dw++] = 1; /* y */
1579        ib.ptr[ib.length_dw++] = 1; /* z */
1580        ib.ptr[ib.length_dw++] =
1581                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582
1583        /* write CS partial flush packet */
1584        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586
1587        /* shedule the ib on the ring */
1588        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1589        if (r) {
1590                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1591                goto fail;
1592        }
1593
1594        /* wait for the GPU to finish processing the IB */
1595        r = fence_wait(f, false);
1596        if (r) {
1597                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1598                goto fail;
1599        }
1600
1601        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1602        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1603        WREG32(mmGB_EDC_MODE, tmp);
1604
1605        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1606        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1607        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1608
1609
1610        /* read back registers to clear the counters */
1611        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1612                RREG32(sec_ded_counter_registers[i]);
1613
1614fail:
1615        fence_put(f);
1616        amdgpu_ib_free(adev, &ib, NULL);
1617        fence_put(f);
1618
1619        return r;
1620}
1621
1622static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1623{
1624        u32 gb_addr_config;
1625        u32 mc_shared_chmap, mc_arb_ramcfg;
1626        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1627        u32 tmp;
1628        int ret;
1629
1630        switch (adev->asic_type) {
1631        case CHIP_TOPAZ:
1632                adev->gfx.config.max_shader_engines = 1;
1633                adev->gfx.config.max_tile_pipes = 2;
1634                adev->gfx.config.max_cu_per_sh = 6;
1635                adev->gfx.config.max_sh_per_se = 1;
1636                adev->gfx.config.max_backends_per_se = 2;
1637                adev->gfx.config.max_texture_channel_caches = 2;
1638                adev->gfx.config.max_gprs = 256;
1639                adev->gfx.config.max_gs_threads = 32;
1640                adev->gfx.config.max_hw_contexts = 8;
1641
1642                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1647                break;
1648        case CHIP_FIJI:
1649                adev->gfx.config.max_shader_engines = 4;
1650                adev->gfx.config.max_tile_pipes = 16;
1651                adev->gfx.config.max_cu_per_sh = 16;
1652                adev->gfx.config.max_sh_per_se = 1;
1653                adev->gfx.config.max_backends_per_se = 4;
1654                adev->gfx.config.max_texture_channel_caches = 16;
1655                adev->gfx.config.max_gprs = 256;
1656                adev->gfx.config.max_gs_threads = 32;
1657                adev->gfx.config.max_hw_contexts = 8;
1658
1659                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1664                break;
1665        case CHIP_POLARIS11:
1666                ret = amdgpu_atombios_get_gfx_info(adev);
1667                if (ret)
1668                        return ret;
1669                adev->gfx.config.max_gprs = 256;
1670                adev->gfx.config.max_gs_threads = 32;
1671                adev->gfx.config.max_hw_contexts = 8;
1672
1673                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1678                break;
1679        case CHIP_POLARIS10:
1680                ret = amdgpu_atombios_get_gfx_info(adev);
1681                if (ret)
1682                        return ret;
1683                adev->gfx.config.max_gprs = 256;
1684                adev->gfx.config.max_gs_threads = 32;
1685                adev->gfx.config.max_hw_contexts = 8;
1686
1687                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692                break;
1693        case CHIP_TONGA:
1694                adev->gfx.config.max_shader_engines = 4;
1695                adev->gfx.config.max_tile_pipes = 8;
1696                adev->gfx.config.max_cu_per_sh = 8;
1697                adev->gfx.config.max_sh_per_se = 1;
1698                adev->gfx.config.max_backends_per_se = 2;
1699                adev->gfx.config.max_texture_channel_caches = 8;
1700                adev->gfx.config.max_gprs = 256;
1701                adev->gfx.config.max_gs_threads = 32;
1702                adev->gfx.config.max_hw_contexts = 8;
1703
1704                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1709                break;
1710        case CHIP_CARRIZO:
1711                adev->gfx.config.max_shader_engines = 1;
1712                adev->gfx.config.max_tile_pipes = 2;
1713                adev->gfx.config.max_sh_per_se = 1;
1714                adev->gfx.config.max_backends_per_se = 2;
1715
1716                switch (adev->pdev->revision) {
1717                case 0xc4:
1718                case 0x84:
1719                case 0xc8:
1720                case 0xcc:
1721                case 0xe1:
1722                case 0xe3:
1723                        /* B10 */
1724                        adev->gfx.config.max_cu_per_sh = 8;
1725                        break;
1726                case 0xc5:
1727                case 0x81:
1728                case 0x85:
1729                case 0xc9:
1730                case 0xcd:
1731                case 0xe2:
1732                case 0xe4:
1733                        /* B8 */
1734                        adev->gfx.config.max_cu_per_sh = 6;
1735                        break;
1736                case 0xc6:
1737                case 0xca:
1738                case 0xce:
1739                case 0x88:
1740                        /* B6 */
1741                        adev->gfx.config.max_cu_per_sh = 6;
1742                        break;
1743                case 0xc7:
1744                case 0x87:
1745                case 0xcb:
1746                case 0xe5:
1747                case 0x89:
1748                default:
1749                        /* B4 */
1750                        adev->gfx.config.max_cu_per_sh = 4;
1751                        break;
1752                }
1753
1754                adev->gfx.config.max_texture_channel_caches = 2;
1755                adev->gfx.config.max_gprs = 256;
1756                adev->gfx.config.max_gs_threads = 32;
1757                adev->gfx.config.max_hw_contexts = 8;
1758
1759                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1764                break;
1765        case CHIP_STONEY:
1766                adev->gfx.config.max_shader_engines = 1;
1767                adev->gfx.config.max_tile_pipes = 2;
1768                adev->gfx.config.max_sh_per_se = 1;
1769                adev->gfx.config.max_backends_per_se = 1;
1770
1771                switch (adev->pdev->revision) {
1772                case 0xc0:
1773                case 0xc1:
1774                case 0xc2:
1775                case 0xc4:
1776                case 0xc8:
1777                case 0xc9:
1778                        adev->gfx.config.max_cu_per_sh = 3;
1779                        break;
1780                case 0xd0:
1781                case 0xd1:
1782                case 0xd2:
1783                default:
1784                        adev->gfx.config.max_cu_per_sh = 2;
1785                        break;
1786                }
1787
1788                adev->gfx.config.max_texture_channel_caches = 2;
1789                adev->gfx.config.max_gprs = 256;
1790                adev->gfx.config.max_gs_threads = 16;
1791                adev->gfx.config.max_hw_contexts = 8;
1792
1793                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798                break;
1799        default:
1800                adev->gfx.config.max_shader_engines = 2;
1801                adev->gfx.config.max_tile_pipes = 4;
1802                adev->gfx.config.max_cu_per_sh = 2;
1803                adev->gfx.config.max_sh_per_se = 1;
1804                adev->gfx.config.max_backends_per_se = 2;
1805                adev->gfx.config.max_texture_channel_caches = 4;
1806                adev->gfx.config.max_gprs = 256;
1807                adev->gfx.config.max_gs_threads = 32;
1808                adev->gfx.config.max_hw_contexts = 8;
1809
1810                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1815                break;
1816        }
1817
1818        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1819        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1820        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1821
1822        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1823        adev->gfx.config.mem_max_burst_length_bytes = 256;
1824        if (adev->flags & AMD_IS_APU) {
1825                /* Get memory bank mapping mode. */
1826                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1827                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1828                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1829
1830                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1831                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1832                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1833
1834                /* Validate settings in case only one DIMM installed. */
1835                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1836                        dimm00_addr_map = 0;
1837                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1838                        dimm01_addr_map = 0;
1839                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1840                        dimm10_addr_map = 0;
1841                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1842                        dimm11_addr_map = 0;
1843
1844                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1845                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1846                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1847                        adev->gfx.config.mem_row_size_in_kb = 2;
1848                else
1849                        adev->gfx.config.mem_row_size_in_kb = 1;
1850        } else {
1851                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1852                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1853                if (adev->gfx.config.mem_row_size_in_kb > 4)
1854                        adev->gfx.config.mem_row_size_in_kb = 4;
1855        }
1856
1857        adev->gfx.config.shader_engine_tile_size = 32;
1858        adev->gfx.config.num_gpus = 1;
1859        adev->gfx.config.multi_gpu_tile_size = 64;
1860
1861        /* fix up row size */
1862        switch (adev->gfx.config.mem_row_size_in_kb) {
1863        case 1:
1864        default:
1865                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1866                break;
1867        case 2:
1868                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1869                break;
1870        case 4:
1871                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1872                break;
1873        }
1874        adev->gfx.config.gb_addr_config = gb_addr_config;
1875
1876        return 0;
1877}
1878
1879static int gfx_v8_0_sw_init(void *handle)
1880{
1881        int i, r;
1882        struct amdgpu_ring *ring;
1883        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1884
1885        /* EOP Event */
1886        r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1887        if (r)
1888                return r;
1889
1890        /* Privileged reg */
1891        r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1892        if (r)
1893                return r;
1894
1895        /* Privileged inst */
1896        r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1897        if (r)
1898                return r;
1899
1900        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1901
1902        gfx_v8_0_scratch_init(adev);
1903
1904        r = gfx_v8_0_init_microcode(adev);
1905        if (r) {
1906                DRM_ERROR("Failed to load gfx firmware!\n");
1907                return r;
1908        }
1909
1910        r = gfx_v8_0_rlc_init(adev);
1911        if (r) {
1912                DRM_ERROR("Failed to init rlc BOs!\n");
1913                return r;
1914        }
1915
1916        r = gfx_v8_0_mec_init(adev);
1917        if (r) {
1918                DRM_ERROR("Failed to init MEC BOs!\n");
1919                return r;
1920        }
1921
1922        /* set up the gfx ring */
1923        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1924                ring = &adev->gfx.gfx_ring[i];
1925                ring->ring_obj = NULL;
1926                sprintf(ring->name, "gfx");
1927                /* no gfx doorbells on iceland */
1928                if (adev->asic_type != CHIP_TOPAZ) {
1929                        ring->use_doorbell = true;
1930                        ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1931                }
1932
1933                r = amdgpu_ring_init(adev, ring, 1024,
1934                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1935                                     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1936                                     AMDGPU_RING_TYPE_GFX);
1937                if (r)
1938                        return r;
1939        }
1940
1941        /* set up the compute queues */
1942        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1943                unsigned irq_type;
1944
1945                /* max 32 queues per MEC */
1946                if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1947                        DRM_ERROR("Too many (%d) compute rings!\n", i);
1948                        break;
1949                }
1950                ring = &adev->gfx.compute_ring[i];
1951                ring->ring_obj = NULL;
1952                ring->use_doorbell = true;
1953                ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1954                ring->me = 1; /* first MEC */
1955                ring->pipe = i / 8;
1956                ring->queue = i % 8;
1957                sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1958                irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1959                /* type-2 packets are deprecated on MEC, use type-3 instead */
1960                r = amdgpu_ring_init(adev, ring, 1024,
1961                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1962                                     &adev->gfx.eop_irq, irq_type,
1963                                     AMDGPU_RING_TYPE_COMPUTE);
1964                if (r)
1965                        return r;
1966        }
1967
1968        /* reserve GDS, GWS and OA resource for gfx */
1969        r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1970                        PAGE_SIZE, true,
1971                        AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1972                        NULL, &adev->gds.gds_gfx_bo);
1973        if (r)
1974                return r;
1975
1976        r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1977                PAGE_SIZE, true,
1978                AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1979                NULL, &adev->gds.gws_gfx_bo);
1980        if (r)
1981                return r;
1982
1983        r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1984                        PAGE_SIZE, true,
1985                        AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1986                        NULL, &adev->gds.oa_gfx_bo);
1987        if (r)
1988                return r;
1989
1990        adev->gfx.ce_ram_size = 0x8000;
1991
1992        r = gfx_v8_0_gpu_early_init(adev);
1993        if (r)
1994                return r;
1995
1996        return 0;
1997}
1998
1999static int gfx_v8_0_sw_fini(void *handle)
2000{
2001        int i;
2002        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2003
2004        amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2005        amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2006        amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2007
2008        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2009                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2010        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2011                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2012
2013        gfx_v8_0_mec_fini(adev);
2014
2015        gfx_v8_0_rlc_fini(adev);
2016
2017        gfx_v8_0_free_microcode(adev);
2018
2019        return 0;
2020}
2021
2022static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2023{
2024        uint32_t *modearray, *mod2array;
2025        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2026        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2027        u32 reg_offset;
2028
2029        modearray = adev->gfx.config.tile_mode_array;
2030        mod2array = adev->gfx.config.macrotile_mode_array;
2031
2032        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2033                modearray[reg_offset] = 0;
2034
2035        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2036                mod2array[reg_offset] = 0;
2037
2038        switch (adev->asic_type) {
2039        case CHIP_TOPAZ:
2040                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041                                PIPE_CONFIG(ADDR_SURF_P2) |
2042                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2043                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045                                PIPE_CONFIG(ADDR_SURF_P2) |
2046                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2047                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049                                PIPE_CONFIG(ADDR_SURF_P2) |
2050                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2051                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053                                PIPE_CONFIG(ADDR_SURF_P2) |
2054                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2055                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057                                PIPE_CONFIG(ADDR_SURF_P2) |
2058                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2059                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                PIPE_CONFIG(ADDR_SURF_P2) |
2062                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065                                PIPE_CONFIG(ADDR_SURF_P2) |
2066                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2069                                PIPE_CONFIG(ADDR_SURF_P2));
2070                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2071                                PIPE_CONFIG(ADDR_SURF_P2) |
2072                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2073                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075                                 PIPE_CONFIG(ADDR_SURF_P2) |
2076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2079                                 PIPE_CONFIG(ADDR_SURF_P2) |
2080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2082                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                 PIPE_CONFIG(ADDR_SURF_P2) |
2084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                 PIPE_CONFIG(ADDR_SURF_P2) |
2088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2091                                 PIPE_CONFIG(ADDR_SURF_P2) |
2092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095                                 PIPE_CONFIG(ADDR_SURF_P2) |
2096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2098                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2099                                 PIPE_CONFIG(ADDR_SURF_P2) |
2100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2102                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103                                 PIPE_CONFIG(ADDR_SURF_P2) |
2104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2105                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2107                                 PIPE_CONFIG(ADDR_SURF_P2) |
2108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2111                                 PIPE_CONFIG(ADDR_SURF_P2) |
2112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2115                                 PIPE_CONFIG(ADDR_SURF_P2) |
2116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2121                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2125                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2133                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2142
2143                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2144                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2145                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2146                                NUM_BANKS(ADDR_SURF_8_BANK));
2147                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150                                NUM_BANKS(ADDR_SURF_8_BANK));
2151                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2152                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154                                NUM_BANKS(ADDR_SURF_8_BANK));
2155                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2158                                NUM_BANKS(ADDR_SURF_8_BANK));
2159                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2161                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2162                                NUM_BANKS(ADDR_SURF_8_BANK));
2163                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2165                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166                                NUM_BANKS(ADDR_SURF_8_BANK));
2167                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                NUM_BANKS(ADDR_SURF_8_BANK));
2171                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2172                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2173                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2174                                NUM_BANKS(ADDR_SURF_16_BANK));
2175                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2177                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178                                NUM_BANKS(ADDR_SURF_16_BANK));
2179                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182                                 NUM_BANKS(ADDR_SURF_16_BANK));
2183                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186                                 NUM_BANKS(ADDR_SURF_16_BANK));
2187                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190                                 NUM_BANKS(ADDR_SURF_16_BANK));
2191                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194                                 NUM_BANKS(ADDR_SURF_16_BANK));
2195                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198                                 NUM_BANKS(ADDR_SURF_8_BANK));
2199
2200                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2201                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2202                            reg_offset != 23)
2203                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2204
2205                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2206                        if (reg_offset != 7)
2207                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2208
2209                break;
2210        case CHIP_FIJI:
2211                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2213                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2214                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2217                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2218                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2221                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2222                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2225                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2226                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2229                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2232                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2233                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2236                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2237                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2238                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2240                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2244                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2245                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2252                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2261                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2274                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2277                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2303                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2332                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333
2334                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                NUM_BANKS(ADDR_SURF_8_BANK));
2338                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341                                NUM_BANKS(ADDR_SURF_8_BANK));
2342                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345                                NUM_BANKS(ADDR_SURF_8_BANK));
2346                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349                                NUM_BANKS(ADDR_SURF_8_BANK));
2350                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2353                                NUM_BANKS(ADDR_SURF_8_BANK));
2354                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2357                                NUM_BANKS(ADDR_SURF_8_BANK));
2358                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2361                                NUM_BANKS(ADDR_SURF_8_BANK));
2362                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2364                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                NUM_BANKS(ADDR_SURF_8_BANK));
2366                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369                                NUM_BANKS(ADDR_SURF_8_BANK));
2370                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2373                                 NUM_BANKS(ADDR_SURF_8_BANK));
2374                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377                                 NUM_BANKS(ADDR_SURF_8_BANK));
2378                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                 NUM_BANKS(ADDR_SURF_8_BANK));
2382                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                 NUM_BANKS(ADDR_SURF_8_BANK));
2386                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389                                 NUM_BANKS(ADDR_SURF_4_BANK));
2390
2391                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2392                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2393
2394                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2395                        if (reg_offset != 7)
2396                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2397
2398                break;
2399        case CHIP_TONGA:
2400                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2402                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2406                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2410                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2414                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2418                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2430                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2434                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2449                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2450                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2471                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2472                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2475                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2483                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2487                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2491                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2492                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2497                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2499                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2503                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2509                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2513                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2517                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2521                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522
2523                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526                                NUM_BANKS(ADDR_SURF_16_BANK));
2527                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2530                                NUM_BANKS(ADDR_SURF_16_BANK));
2531                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534                                NUM_BANKS(ADDR_SURF_16_BANK));
2535                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                NUM_BANKS(ADDR_SURF_16_BANK));
2539                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542                                NUM_BANKS(ADDR_SURF_16_BANK));
2543                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2546                                NUM_BANKS(ADDR_SURF_16_BANK));
2547                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2550                                NUM_BANKS(ADDR_SURF_16_BANK));
2551                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2553                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554                                NUM_BANKS(ADDR_SURF_16_BANK));
2555                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2557                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558                                NUM_BANKS(ADDR_SURF_16_BANK));
2559                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562                                 NUM_BANKS(ADDR_SURF_16_BANK));
2563                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2566                                 NUM_BANKS(ADDR_SURF_16_BANK));
2567                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2570                                 NUM_BANKS(ADDR_SURF_8_BANK));
2571                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2573                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2574                                 NUM_BANKS(ADDR_SURF_4_BANK));
2575                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                 NUM_BANKS(ADDR_SURF_4_BANK));
2579
2580                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2581                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2582
2583                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2584                        if (reg_offset != 7)
2585                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2586
2587                break;
2588        case CHIP_POLARIS11:
2589                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2593                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2596                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2601                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2603                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2608                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2612                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2616                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2620                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2622                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2623                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2634                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2638                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2639                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2646                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2648                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2650                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2660                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2663                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2664                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2666                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2667                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2668                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2670                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2671                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2672                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2674                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2675                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2676                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2678                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2680                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2688                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2692                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2698                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2702                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2706                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2710                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711
2712                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2715                                NUM_BANKS(ADDR_SURF_16_BANK));
2716
2717                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720                                NUM_BANKS(ADDR_SURF_16_BANK));
2721
2722                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725                                NUM_BANKS(ADDR_SURF_16_BANK));
2726
2727                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730                                NUM_BANKS(ADDR_SURF_16_BANK));
2731
2732                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                NUM_BANKS(ADDR_SURF_16_BANK));
2736
2737                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740                                NUM_BANKS(ADDR_SURF_16_BANK));
2741
2742                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745                                NUM_BANKS(ADDR_SURF_16_BANK));
2746
2747                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2748                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2749                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750                                NUM_BANKS(ADDR_SURF_16_BANK));
2751
2752                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2753                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755                                NUM_BANKS(ADDR_SURF_16_BANK));
2756
2757                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760                                NUM_BANKS(ADDR_SURF_16_BANK));
2761
2762                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765                                NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770                                NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2775                                NUM_BANKS(ADDR_SURF_8_BANK));
2776
2777                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2780                                NUM_BANKS(ADDR_SURF_4_BANK));
2781
2782                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2783                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2784
2785                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2786                        if (reg_offset != 7)
2787                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2788
2789                break;
2790        case CHIP_POLARIS10:
2791                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2793                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2794                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2797                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2798                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2801                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2802                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2805                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2806                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2809                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2810                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2813                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2817                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2824                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2825                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2827                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2828                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2831                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2835                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2837                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2850                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2854                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2857                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2862                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2865                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2868                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2870                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2874                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2878                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2886                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2890                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2892                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2894                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913
2914                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2916                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                NUM_BANKS(ADDR_SURF_16_BANK));
2918
2919                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                NUM_BANKS(ADDR_SURF_16_BANK));
2923
2924                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2926                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2927                                NUM_BANKS(ADDR_SURF_16_BANK));
2928
2929                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2931                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2932                                NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2937                                NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2942                                NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2947                                NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2951                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952                                NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957                                NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2961                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962                                NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2967                                NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2972                                NUM_BANKS(ADDR_SURF_8_BANK));
2973
2974                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2976                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2977                                NUM_BANKS(ADDR_SURF_4_BANK));
2978
2979                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2982                                NUM_BANKS(ADDR_SURF_4_BANK));
2983
2984                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2985                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2986
2987                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2988                        if (reg_offset != 7)
2989                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2990
2991                break;
2992        case CHIP_STONEY:
2993                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994                                PIPE_CONFIG(ADDR_SURF_P2) |
2995                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2996                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2997                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                PIPE_CONFIG(ADDR_SURF_P2) |
2999                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3000                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002                                PIPE_CONFIG(ADDR_SURF_P2) |
3003                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3004                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                PIPE_CONFIG(ADDR_SURF_P2) |
3007                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3008                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                PIPE_CONFIG(ADDR_SURF_P2) |
3011                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3012                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014                                PIPE_CONFIG(ADDR_SURF_P2) |
3015                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018                                PIPE_CONFIG(ADDR_SURF_P2) |
3019                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3022                                PIPE_CONFIG(ADDR_SURF_P2));
3023                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024                                PIPE_CONFIG(ADDR_SURF_P2) |
3025                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3026                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3027                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                 PIPE_CONFIG(ADDR_SURF_P2) |
3029                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3030                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3031                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P2) |
3033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3035                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P2) |
3037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P2) |
3041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P2) |
3045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3046                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3047                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P2) |
3049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3052                                 PIPE_CONFIG(ADDR_SURF_P2) |
3053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3055                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3056                                 PIPE_CONFIG(ADDR_SURF_P2) |
3057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3058                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3059                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3060                                 PIPE_CONFIG(ADDR_SURF_P2) |
3061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3062                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3063                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3064                                 PIPE_CONFIG(ADDR_SURF_P2) |
3065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3067                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3068                                 PIPE_CONFIG(ADDR_SURF_P2) |
3069                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3070                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3072                                 PIPE_CONFIG(ADDR_SURF_P2) |
3073                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3074                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3076                                 PIPE_CONFIG(ADDR_SURF_P2) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3080                                 PIPE_CONFIG(ADDR_SURF_P2) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3084                                 PIPE_CONFIG(ADDR_SURF_P2) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3086                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088                                 PIPE_CONFIG(ADDR_SURF_P2) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3090                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3092                                 PIPE_CONFIG(ADDR_SURF_P2) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3094                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3095
3096                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3098                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099                                NUM_BANKS(ADDR_SURF_8_BANK));
3100                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3102                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3103                                NUM_BANKS(ADDR_SURF_8_BANK));
3104                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3105                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3106                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3107                                NUM_BANKS(ADDR_SURF_8_BANK));
3108                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3111                                NUM_BANKS(ADDR_SURF_8_BANK));
3112                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3115                                NUM_BANKS(ADDR_SURF_8_BANK));
3116                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3118                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3119                                NUM_BANKS(ADDR_SURF_8_BANK));
3120                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3122                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3123                                NUM_BANKS(ADDR_SURF_8_BANK));
3124                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3125                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3126                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3127                                NUM_BANKS(ADDR_SURF_16_BANK));
3128                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3129                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                NUM_BANKS(ADDR_SURF_16_BANK));
3132                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135                                 NUM_BANKS(ADDR_SURF_16_BANK));
3136                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139                                 NUM_BANKS(ADDR_SURF_16_BANK));
3140                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143                                 NUM_BANKS(ADDR_SURF_16_BANK));
3144                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147                                 NUM_BANKS(ADDR_SURF_16_BANK));
3148                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151                                 NUM_BANKS(ADDR_SURF_8_BANK));
3152
3153                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3154                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3155                            reg_offset != 23)
3156                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3157
3158                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3159                        if (reg_offset != 7)
3160                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3161
3162                break;
3163        default:
3164                dev_warn(adev->dev,
3165                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3166                         adev->asic_type);
3167
3168        case CHIP_CARRIZO:
3169                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                PIPE_CONFIG(ADDR_SURF_P2) |
3171                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3172                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                PIPE_CONFIG(ADDR_SURF_P2) |
3175                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3176                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                PIPE_CONFIG(ADDR_SURF_P2) |
3179                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3180                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                PIPE_CONFIG(ADDR_SURF_P2) |
3183                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3184                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3186                                PIPE_CONFIG(ADDR_SURF_P2) |
3187                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190                                PIPE_CONFIG(ADDR_SURF_P2) |
3191                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3192                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3193                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3194                                PIPE_CONFIG(ADDR_SURF_P2) |
3195                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3196                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3197                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198                                PIPE_CONFIG(ADDR_SURF_P2));
3199                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200                                PIPE_CONFIG(ADDR_SURF_P2) |
3201                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                 PIPE_CONFIG(ADDR_SURF_P2) |
3205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3206                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208                                 PIPE_CONFIG(ADDR_SURF_P2) |
3209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3210                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3212                                 PIPE_CONFIG(ADDR_SURF_P2) |
3213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216                                 PIPE_CONFIG(ADDR_SURF_P2) |
3217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3220                                 PIPE_CONFIG(ADDR_SURF_P2) |
3221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                 PIPE_CONFIG(ADDR_SURF_P2) |
3225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3226                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3227                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2) |
3229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3230                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3232                                 PIPE_CONFIG(ADDR_SURF_P2) |
3233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236                                 PIPE_CONFIG(ADDR_SURF_P2) |
3237                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3240                                 PIPE_CONFIG(ADDR_SURF_P2) |
3241                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3244                                 PIPE_CONFIG(ADDR_SURF_P2) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3248                                 PIPE_CONFIG(ADDR_SURF_P2) |
3249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3252                                 PIPE_CONFIG(ADDR_SURF_P2) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3254                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3255                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3256                                 PIPE_CONFIG(ADDR_SURF_P2) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3258                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3260                                 PIPE_CONFIG(ADDR_SURF_P2) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264                                 PIPE_CONFIG(ADDR_SURF_P2) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268                                 PIPE_CONFIG(ADDR_SURF_P2) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3270                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271
3272                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3274                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275                                NUM_BANKS(ADDR_SURF_8_BANK));
3276                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3278                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279                                NUM_BANKS(ADDR_SURF_8_BANK));
3280                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                NUM_BANKS(ADDR_SURF_8_BANK));
3284                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287                                NUM_BANKS(ADDR_SURF_8_BANK));
3288                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291                                NUM_BANKS(ADDR_SURF_8_BANK));
3292                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3295                                NUM_BANKS(ADDR_SURF_8_BANK));
3296                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3297                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3298                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3299                                NUM_BANKS(ADDR_SURF_8_BANK));
3300                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3301                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3302                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                NUM_BANKS(ADDR_SURF_16_BANK));
3304                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3305                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                NUM_BANKS(ADDR_SURF_16_BANK));
3308                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                 NUM_BANKS(ADDR_SURF_16_BANK));
3312                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315                                 NUM_BANKS(ADDR_SURF_16_BANK));
3316                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                 NUM_BANKS(ADDR_SURF_16_BANK));
3320                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323                                 NUM_BANKS(ADDR_SURF_16_BANK));
3324                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328
3329                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3330                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3331                            reg_offset != 23)
3332                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3333
3334                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3335                        if (reg_offset != 7)
3336                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3337
3338                break;
3339        }
3340}
3341
3342void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3343{
3344        u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3345
3346        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3347                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3348                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3349        } else if (se_num == 0xffffffff) {
3350                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3351                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3352        } else if (sh_num == 0xffffffff) {
3353                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3354                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3355        } else {
3356                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3357                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3358        }
3359        WREG32(mmGRBM_GFX_INDEX, data);
3360}
3361
3362static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3363{
3364        return (u32)((1ULL << bit_width) - 1);
3365}
3366
3367static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3368{
3369        u32 data, mask;
3370
3371        data = RREG32(mmCC_RB_BACKEND_DISABLE);
3372        data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3373
3374        data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3375        data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3376
3377        mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3378                                       adev->gfx.config.max_sh_per_se);
3379
3380        return (~data) & mask;
3381}
3382
3383static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3384{
3385        int i, j;
3386        u32 data;
3387        u32 active_rbs = 0;
3388        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3389                                        adev->gfx.config.max_sh_per_se;
3390
3391        mutex_lock(&adev->grbm_idx_mutex);
3392        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3393                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3394                        gfx_v8_0_select_se_sh(adev, i, j);
3395                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3396                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3397                                               rb_bitmap_width_per_sh);
3398                }
3399        }
3400        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3401        mutex_unlock(&adev->grbm_idx_mutex);
3402
3403        adev->gfx.config.backend_enable_mask = active_rbs;
3404        adev->gfx.config.num_rbs = hweight32(active_rbs);
3405}
3406
3407/**
3408 * gfx_v8_0_init_compute_vmid - gart enable
3409 *
3410 * @rdev: amdgpu_device pointer
3411 *
3412 * Initialize compute vmid sh_mem registers
3413 *
3414 */
3415#define DEFAULT_SH_MEM_BASES    (0x6000)
3416#define FIRST_COMPUTE_VMID      (8)
3417#define LAST_COMPUTE_VMID       (16)
3418static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3419{
3420        int i;
3421        uint32_t sh_mem_config;
3422        uint32_t sh_mem_bases;
3423
3424        /*
3425         * Configure apertures:
3426         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3427         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3428         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3429         */
3430        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3431
3432        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3433                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3434                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3435                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3436                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3437                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3438
3439        mutex_lock(&adev->srbm_mutex);
3440        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3441                vi_srbm_select(adev, 0, 0, 0, i);
3442                /* CP and shaders */
3443                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3444                WREG32(mmSH_MEM_APE1_BASE, 1);
3445                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3446                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3447        }
3448        vi_srbm_select(adev, 0, 0, 0, 0);
3449        mutex_unlock(&adev->srbm_mutex);
3450}
3451
3452static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3453{
3454        u32 tmp;
3455        int i;
3456
3457        tmp = RREG32(mmGRBM_CNTL);
3458        tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3459        WREG32(mmGRBM_CNTL, tmp);
3460
3461        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3462        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3463        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3464
3465        gfx_v8_0_tiling_mode_table_init(adev);
3466
3467        gfx_v8_0_setup_rb(adev);
3468        gfx_v8_0_get_cu_info(adev);
3469
3470        /* XXX SH_MEM regs */
3471        /* where to put LDS, scratch, GPUVM in FSA64 space */
3472        mutex_lock(&adev->srbm_mutex);
3473        for (i = 0; i < 16; i++) {
3474                vi_srbm_select(adev, 0, 0, 0, i);
3475                /* CP and shaders */
3476                if (i == 0) {
3477                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3478                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3479                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3480                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3481                        WREG32(mmSH_MEM_CONFIG, tmp);
3482                } else {
3483                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3484                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3485                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3486                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3487                        WREG32(mmSH_MEM_CONFIG, tmp);
3488                }
3489
3490                WREG32(mmSH_MEM_APE1_BASE, 1);
3491                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3492                WREG32(mmSH_MEM_BASES, 0);
3493        }
3494        vi_srbm_select(adev, 0, 0, 0, 0);
3495        mutex_unlock(&adev->srbm_mutex);
3496
3497        gfx_v8_0_init_compute_vmid(adev);
3498
3499        mutex_lock(&adev->grbm_idx_mutex);
3500        /*
3501         * making sure that the following register writes will be broadcasted
3502         * to all the shaders
3503         */
3504        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505
3506        WREG32(mmPA_SC_FIFO_SIZE,
3507                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3508                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3509                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3510                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3511                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3512                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3513                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3514                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3515        mutex_unlock(&adev->grbm_idx_mutex);
3516
3517}
3518
3519static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3520{
3521        u32 i, j, k;
3522        u32 mask;
3523
3524        mutex_lock(&adev->grbm_idx_mutex);
3525        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3526                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3527                        gfx_v8_0_select_se_sh(adev, i, j);
3528                        for (k = 0; k < adev->usec_timeout; k++) {
3529                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3530                                        break;
3531                                udelay(1);
3532                        }
3533                }
3534        }
3535        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3536        mutex_unlock(&adev->grbm_idx_mutex);
3537
3538        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3539                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3540                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3541                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3542        for (k = 0; k < adev->usec_timeout; k++) {
3543                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3544                        break;
3545                udelay(1);
3546        }
3547}
3548
3549static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3550                                               bool enable)
3551{
3552        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3553
3554        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3555        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3556        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3557        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3558
3559        WREG32(mmCP_INT_CNTL_RING0, tmp);
3560}
3561
3562static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3563{
3564        /* csib */
3565        WREG32(mmRLC_CSIB_ADDR_HI,
3566                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3567        WREG32(mmRLC_CSIB_ADDR_LO,
3568                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3569        WREG32(mmRLC_CSIB_LENGTH,
3570                        adev->gfx.rlc.clear_state_size);
3571}
3572
3573static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3574                                int ind_offset,
3575                                int list_size,
3576                                int *unique_indices,
3577                                int *indices_count,
3578                                int max_indices,
3579                                int *ind_start_offsets,
3580                                int *offset_count,
3581                                int max_offset)
3582{
3583        int indices;
3584        bool new_entry = true;
3585
3586        for (; ind_offset < list_size; ind_offset++) {
3587
3588                if (new_entry) {
3589                        new_entry = false;
3590                        ind_start_offsets[*offset_count] = ind_offset;
3591                        *offset_count = *offset_count + 1;
3592                        BUG_ON(*offset_count >= max_offset);
3593                }
3594
3595                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3596                        new_entry = true;
3597                        continue;
3598                }
3599
3600                ind_offset += 2;
3601
3602                /* look for the matching indice */
3603                for (indices = 0;
3604                        indices < *indices_count;
3605                        indices++) {
3606                        if (unique_indices[indices] ==
3607                                register_list_format[ind_offset])
3608                                break;
3609                }
3610
3611                if (indices >= *indices_count) {
3612                        unique_indices[*indices_count] =
3613                                register_list_format[ind_offset];
3614                        indices = *indices_count;
3615                        *indices_count = *indices_count + 1;
3616                        BUG_ON(*indices_count >= max_indices);
3617                }
3618
3619                register_list_format[ind_offset] = indices;
3620        }
3621}
3622
3623static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3624{
3625        int i, temp, data;
3626        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3627        int indices_count = 0;
3628        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3629        int offset_count = 0;
3630
3631        int list_size;
3632        unsigned int *register_list_format =
3633                kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3634        if (register_list_format == NULL)
3635                return -ENOMEM;
3636        memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3637                        adev->gfx.rlc.reg_list_format_size_bytes);
3638
3639        gfx_v8_0_parse_ind_reg_list(register_list_format,
3640                                RLC_FormatDirectRegListLength,
3641                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3642                                unique_indices,
3643                                &indices_count,
3644                                sizeof(unique_indices) / sizeof(int),
3645                                indirect_start_offsets,
3646                                &offset_count,
3647                                sizeof(indirect_start_offsets)/sizeof(int));
3648
3649        /* save and restore list */
3650        temp = RREG32(mmRLC_SRM_CNTL);
3651        temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3652        WREG32(mmRLC_SRM_CNTL, temp);
3653
3654        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3655        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3656                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3657
3658        /* indirect list */
3659        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3660        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3661                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3662
3663        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3664        list_size = list_size >> 1;
3665        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3666        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3667
3668        /* starting offsets starts */
3669        WREG32(mmRLC_GPM_SCRATCH_ADDR,
3670                adev->gfx.rlc.starting_offsets_start);
3671        for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3672                WREG32(mmRLC_GPM_SCRATCH_DATA,
3673                                indirect_start_offsets[i]);
3674
3675        /* unique indices */
3676        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3677        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3678        for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3679                amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3680                amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3681        }
3682        kfree(register_list_format);
3683
3684        return 0;
3685}
3686
3687static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3688{
3689        uint32_t data;
3690
3691        data = RREG32(mmRLC_SRM_CNTL);
3692        data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3693        WREG32(mmRLC_SRM_CNTL, data);
3694}
3695
3696static void polaris11_init_power_gating(struct amdgpu_device *adev)
3697{
3698        uint32_t data;
3699
3700        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3701                        AMD_PG_SUPPORT_GFX_SMG |
3702                        AMD_PG_SUPPORT_GFX_DMG)) {
3703                data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3704                data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3705                data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3706                WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3707
3708                data = 0;
3709                data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3710                data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3711                data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3712                data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3713                WREG32(mmRLC_PG_DELAY, data);
3714
3715                data = RREG32(mmRLC_PG_DELAY_2);
3716                data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3717                data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3718                WREG32(mmRLC_PG_DELAY_2, data);
3719
3720                data = RREG32(mmRLC_AUTO_PG_CTRL);
3721                data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3722                data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3723                WREG32(mmRLC_AUTO_PG_CTRL, data);
3724        }
3725}
3726
3727static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3728{
3729        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3730                              AMD_PG_SUPPORT_GFX_SMG |
3731                              AMD_PG_SUPPORT_GFX_DMG |
3732                              AMD_PG_SUPPORT_CP |
3733                              AMD_PG_SUPPORT_GDS |
3734                              AMD_PG_SUPPORT_RLC_SMU_HS)) {
3735                gfx_v8_0_init_csb(adev);
3736                gfx_v8_0_init_save_restore_list(adev);
3737                gfx_v8_0_enable_save_restore_machine(adev);
3738
3739                if (adev->asic_type == CHIP_POLARIS11)
3740                        polaris11_init_power_gating(adev);
3741        }
3742}
3743
3744void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3745{
3746        u32 tmp = RREG32(mmRLC_CNTL);
3747
3748        tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3749        WREG32(mmRLC_CNTL, tmp);
3750
3751        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3752
3753        gfx_v8_0_wait_for_rlc_serdes(adev);
3754}
3755
3756static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3757{
3758        u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3759
3760        tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3761        WREG32(mmGRBM_SOFT_RESET, tmp);
3762        udelay(50);
3763        tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3764        WREG32(mmGRBM_SOFT_RESET, tmp);
3765        udelay(50);
3766}
3767
3768static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3769{
3770        u32 tmp = RREG32(mmRLC_CNTL);
3771
3772        tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3773        WREG32(mmRLC_CNTL, tmp);
3774
3775        /* carrizo do enable cp interrupt after cp inited */
3776        if (!(adev->flags & AMD_IS_APU))
3777                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3778
3779        udelay(50);
3780}
3781
3782static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3783{
3784        const struct rlc_firmware_header_v2_0 *hdr;
3785        const __le32 *fw_data;
3786        unsigned i, fw_size;
3787
3788        if (!adev->gfx.rlc_fw)
3789                return -EINVAL;
3790
3791        hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3792        amdgpu_ucode_print_rlc_hdr(&hdr->header);
3793
3794        fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3795                           le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3796        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3797
3798        WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3799        for (i = 0; i < fw_size; i++)
3800                WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3801        WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3802
3803        return 0;
3804}
3805
3806static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3807{
3808        int r;
3809
3810        gfx_v8_0_rlc_stop(adev);
3811
3812        /* disable CG */
3813        WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3814        if (adev->asic_type == CHIP_POLARIS11 ||
3815                adev->asic_type == CHIP_POLARIS10)
3816                WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3817
3818        /* disable PG */
3819        WREG32(mmRLC_PG_CNTL, 0);
3820
3821        gfx_v8_0_rlc_reset(adev);
3822
3823        gfx_v8_0_init_pg(adev);
3824
3825        if (!adev->pp_enabled) {
3826                if (!adev->firmware.smu_load) {
3827                        /* legacy rlc firmware loading */
3828                        r = gfx_v8_0_rlc_load_microcode(adev);
3829                        if (r)
3830                                return r;
3831                } else {
3832                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3833                                                        AMDGPU_UCODE_ID_RLC_G);
3834                        if (r)
3835                                return -EINVAL;
3836                }
3837        }
3838
3839        gfx_v8_0_rlc_start(adev);
3840
3841        return 0;
3842}
3843
3844static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3845{
3846        int i;
3847        u32 tmp = RREG32(mmCP_ME_CNTL);
3848
3849        if (enable) {
3850                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3851                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3852                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3853        } else {
3854                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3855                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3856                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3857                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3858                        adev->gfx.gfx_ring[i].ready = false;
3859        }
3860        WREG32(mmCP_ME_CNTL, tmp);
3861        udelay(50);
3862}
3863
3864static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3865{
3866        const struct gfx_firmware_header_v1_0 *pfp_hdr;
3867        const struct gfx_firmware_header_v1_0 *ce_hdr;
3868        const struct gfx_firmware_header_v1_0 *me_hdr;
3869        const __le32 *fw_data;
3870        unsigned i, fw_size;
3871
3872        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3873                return -EINVAL;
3874
3875        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3876                adev->gfx.pfp_fw->data;
3877        ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3878                adev->gfx.ce_fw->data;
3879        me_hdr = (const struct gfx_firmware_header_v1_0 *)
3880                adev->gfx.me_fw->data;
3881
3882        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3883        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3884        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3885
3886        gfx_v8_0_cp_gfx_enable(adev, false);
3887
3888        /* PFP */
3889        fw_data = (const __le32 *)
3890                (adev->gfx.pfp_fw->data +
3891                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3892        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3893        WREG32(mmCP_PFP_UCODE_ADDR, 0);
3894        for (i = 0; i < fw_size; i++)
3895                WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3896        WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3897
3898        /* CE */
3899        fw_data = (const __le32 *)
3900                (adev->gfx.ce_fw->data +
3901                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3902        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3903        WREG32(mmCP_CE_UCODE_ADDR, 0);
3904        for (i = 0; i < fw_size; i++)
3905                WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3906        WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3907
3908        /* ME */
3909        fw_data = (const __le32 *)
3910                (adev->gfx.me_fw->data +
3911                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3912        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3913        WREG32(mmCP_ME_RAM_WADDR, 0);
3914        for (i = 0; i < fw_size; i++)
3915                WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3916        WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3917
3918        return 0;
3919}
3920
3921static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3922{
3923        u32 count = 0;
3924        const struct cs_section_def *sect = NULL;
3925        const struct cs_extent_def *ext = NULL;
3926
3927        /* begin clear state */
3928        count += 2;
3929        /* context control state */
3930        count += 3;
3931
3932        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3933                for (ext = sect->section; ext->extent != NULL; ++ext) {
3934                        if (sect->id == SECT_CONTEXT)
3935                                count += 2 + ext->reg_count;
3936                        else
3937                                return 0;
3938                }
3939        }
3940        /* pa_sc_raster_config/pa_sc_raster_config1 */
3941        count += 4;
3942        /* end clear state */
3943        count += 2;
3944        /* clear state */
3945        count += 2;
3946
3947        return count;
3948}
3949
3950static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3951{
3952        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3953        const struct cs_section_def *sect = NULL;
3954        const struct cs_extent_def *ext = NULL;
3955        int r, i;
3956
3957        /* init the CP */
3958        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3959        WREG32(mmCP_ENDIAN_SWAP, 0);
3960        WREG32(mmCP_DEVICE_ID, 1);
3961
3962        gfx_v8_0_cp_gfx_enable(adev, true);
3963
3964        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3965        if (r) {
3966                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3967                return r;
3968        }
3969
3970        /* clear state buffer */
3971        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3972        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3973
3974        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3975        amdgpu_ring_write(ring, 0x80000000);
3976        amdgpu_ring_write(ring, 0x80000000);
3977
3978        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3979                for (ext = sect->section; ext->extent != NULL; ++ext) {
3980                        if (sect->id == SECT_CONTEXT) {
3981                                amdgpu_ring_write(ring,
3982                                       PACKET3(PACKET3_SET_CONTEXT_REG,
3983                                               ext->reg_count));
3984                                amdgpu_ring_write(ring,
3985                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3986                                for (i = 0; i < ext->reg_count; i++)
3987                                        amdgpu_ring_write(ring, ext->extent[i]);
3988                        }
3989                }
3990        }
3991
3992        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3993        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3994        switch (adev->asic_type) {
3995        case CHIP_TONGA:
3996        case CHIP_POLARIS10:
3997                amdgpu_ring_write(ring, 0x16000012);
3998                amdgpu_ring_write(ring, 0x0000002A);
3999                break;
4000        case CHIP_POLARIS11:
4001                amdgpu_ring_write(ring, 0x16000012);
4002                amdgpu_ring_write(ring, 0x00000000);
4003                break;
4004        case CHIP_FIJI:
4005                amdgpu_ring_write(ring, 0x3a00161a);
4006                amdgpu_ring_write(ring, 0x0000002e);
4007                break;
4008        case CHIP_CARRIZO:
4009                amdgpu_ring_write(ring, 0x00000002);
4010                amdgpu_ring_write(ring, 0x00000000);
4011                break;
4012        case CHIP_TOPAZ:
4013                amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4014                                0x00000000 : 0x00000002);
4015                amdgpu_ring_write(ring, 0x00000000);
4016                break;
4017        case CHIP_STONEY:
4018                amdgpu_ring_write(ring, 0x00000000);
4019                amdgpu_ring_write(ring, 0x00000000);
4020                break;
4021        default:
4022                BUG();
4023        }
4024
4025        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4026        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4027
4028        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4029        amdgpu_ring_write(ring, 0);
4030
4031        /* init the CE partitions */
4032        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4033        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4034        amdgpu_ring_write(ring, 0x8000);
4035        amdgpu_ring_write(ring, 0x8000);
4036
4037        amdgpu_ring_commit(ring);
4038
4039        return 0;
4040}
4041
4042static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4043{
4044        struct amdgpu_ring *ring;
4045        u32 tmp;
4046        u32 rb_bufsz;
4047        u64 rb_addr, rptr_addr;
4048        int r;
4049
4050        /* Set the write pointer delay */
4051        WREG32(mmCP_RB_WPTR_DELAY, 0);
4052
4053        /* set the RB to use vmid 0 */
4054        WREG32(mmCP_RB_VMID, 0);
4055
4056        /* Set ring buffer size */
4057        ring = &adev->gfx.gfx_ring[0];
4058        rb_bufsz = order_base_2(ring->ring_size / 8);
4059        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4060        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4061        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4062        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4063#ifdef __BIG_ENDIAN
4064        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4065#endif
4066        WREG32(mmCP_RB0_CNTL, tmp);
4067
4068        /* Initialize the ring buffer's read and write pointers */
4069        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4070        ring->wptr = 0;
4071        WREG32(mmCP_RB0_WPTR, ring->wptr);
4072
4073        /* set the wb address wether it's enabled or not */
4074        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4075        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4076        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4077
4078        mdelay(1);
4079        WREG32(mmCP_RB0_CNTL, tmp);
4080
4081        rb_addr = ring->gpu_addr >> 8;
4082        WREG32(mmCP_RB0_BASE, rb_addr);
4083        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4084
4085        /* no gfx doorbells on iceland */
4086        if (adev->asic_type != CHIP_TOPAZ) {
4087                tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4088                if (ring->use_doorbell) {
4089                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090                                            DOORBELL_OFFSET, ring->doorbell_index);
4091                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4092                                            DOORBELL_HIT, 0);
4093                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4094                                            DOORBELL_EN, 1);
4095                } else {
4096                        tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4097                                            DOORBELL_EN, 0);
4098                }
4099                WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4100
4101                if (adev->asic_type == CHIP_TONGA) {
4102                        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4103                                            DOORBELL_RANGE_LOWER,
4104                                            AMDGPU_DOORBELL_GFX_RING0);
4105                        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4106
4107                        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4108                               CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4109                }
4110
4111        }
4112
4113        /* start the ring */
4114        gfx_v8_0_cp_gfx_start(adev);
4115        ring->ready = true;
4116        r = amdgpu_ring_test_ring(ring);
4117        if (r) {
4118                ring->ready = false;
4119                return r;
4120        }
4121
4122        return 0;
4123}
4124
4125static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4126{
4127        int i;
4128
4129        if (enable) {
4130                WREG32(mmCP_MEC_CNTL, 0);
4131        } else {
4132                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4133                for (i = 0; i < adev->gfx.num_compute_rings; i++)
4134                        adev->gfx.compute_ring[i].ready = false;
4135        }
4136        udelay(50);
4137}
4138
4139static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4140{
4141        const struct gfx_firmware_header_v1_0 *mec_hdr;
4142        const __le32 *fw_data;
4143        unsigned i, fw_size;
4144
4145        if (!adev->gfx.mec_fw)
4146                return -EINVAL;
4147
4148        gfx_v8_0_cp_compute_enable(adev, false);
4149
4150        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4151        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4152
4153        fw_data = (const __le32 *)
4154                (adev->gfx.mec_fw->data +
4155                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4156        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4157
4158        /* MEC1 */
4159        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4160        for (i = 0; i < fw_size; i++)
4161                WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4162        WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4163
4164        /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4165        if (adev->gfx.mec2_fw) {
4166                const struct gfx_firmware_header_v1_0 *mec2_hdr;
4167
4168                mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4169                amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4170
4171                fw_data = (const __le32 *)
4172                        (adev->gfx.mec2_fw->data +
4173                         le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4174                fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4175
4176                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4177                for (i = 0; i < fw_size; i++)
4178                        WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4179                WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4180        }
4181
4182        return 0;
4183}
4184
4185struct vi_mqd {
4186        uint32_t header;  /* ordinal0 */
4187        uint32_t compute_dispatch_initiator;  /* ordinal1 */
4188        uint32_t compute_dim_x;  /* ordinal2 */
4189        uint32_t compute_dim_y;  /* ordinal3 */
4190        uint32_t compute_dim_z;  /* ordinal4 */
4191        uint32_t compute_start_x;  /* ordinal5 */
4192        uint32_t compute_start_y;  /* ordinal6 */
4193        uint32_t compute_start_z;  /* ordinal7 */
4194        uint32_t compute_num_thread_x;  /* ordinal8 */
4195        uint32_t compute_num_thread_y;  /* ordinal9 */
4196        uint32_t compute_num_thread_z;  /* ordinal10 */
4197        uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4198        uint32_t compute_perfcount_enable;  /* ordinal12 */
4199        uint32_t compute_pgm_lo;  /* ordinal13 */
4200        uint32_t compute_pgm_hi;  /* ordinal14 */
4201        uint32_t compute_tba_lo;  /* ordinal15 */
4202        uint32_t compute_tba_hi;  /* ordinal16 */
4203        uint32_t compute_tma_lo;  /* ordinal17 */
4204        uint32_t compute_tma_hi;  /* ordinal18 */
4205        uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4206        uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4207        uint32_t compute_vmid;  /* ordinal21 */
4208        uint32_t compute_resource_limits;  /* ordinal22 */
4209        uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4210        uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4211        uint32_t compute_tmpring_size;  /* ordinal25 */
4212        uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4213        uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4214        uint32_t compute_restart_x;  /* ordinal28 */
4215        uint32_t compute_restart_y;  /* ordinal29 */
4216        uint32_t compute_restart_z;  /* ordinal30 */
4217        uint32_t compute_thread_trace_enable;  /* ordinal31 */
4218        uint32_t compute_misc_reserved;  /* ordinal32 */
4219        uint32_t compute_dispatch_id;  /* ordinal33 */
4220        uint32_t compute_threadgroup_id;  /* ordinal34 */
4221        uint32_t compute_relaunch;  /* ordinal35 */
4222        uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4223        uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4224        uint32_t compute_wave_restore_control;  /* ordinal38 */
4225        uint32_t reserved9;  /* ordinal39 */
4226        uint32_t reserved10;  /* ordinal40 */
4227        uint32_t reserved11;  /* ordinal41 */
4228        uint32_t reserved12;  /* ordinal42 */
4229        uint32_t reserved13;  /* ordinal43 */
4230        uint32_t reserved14;  /* ordinal44 */
4231        uint32_t reserved15;  /* ordinal45 */
4232        uint32_t reserved16;  /* ordinal46 */
4233        uint32_t reserved17;  /* ordinal47 */
4234        uint32_t reserved18;  /* ordinal48 */
4235        uint32_t reserved19;  /* ordinal49 */
4236        uint32_t reserved20;  /* ordinal50 */
4237        uint32_t reserved21;  /* ordinal51 */
4238        uint32_t reserved22;  /* ordinal52 */
4239        uint32_t reserved23;  /* ordinal53 */
4240        uint32_t reserved24;  /* ordinal54 */
4241        uint32_t reserved25;  /* ordinal55 */
4242        uint32_t reserved26;  /* ordinal56 */
4243        uint32_t reserved27;  /* ordinal57 */
4244        uint32_t reserved28;  /* ordinal58 */
4245        uint32_t reserved29;  /* ordinal59 */
4246        uint32_t reserved30;  /* ordinal60 */
4247        uint32_t reserved31;  /* ordinal61 */
4248        uint32_t reserved32;  /* ordinal62 */
4249        uint32_t reserved33;  /* ordinal63 */
4250        uint32_t reserved34;  /* ordinal64 */
4251        uint32_t compute_user_data_0;  /* ordinal65 */
4252        uint32_t compute_user_data_1;  /* ordinal66 */
4253        uint32_t compute_user_data_2;  /* ordinal67 */
4254        uint32_t compute_user_data_3;  /* ordinal68 */
4255        uint32_t compute_user_data_4;  /* ordinal69 */
4256        uint32_t compute_user_data_5;  /* ordinal70 */
4257        uint32_t compute_user_data_6;  /* ordinal71 */
4258        uint32_t compute_user_data_7;  /* ordinal72 */
4259        uint32_t compute_user_data_8;  /* ordinal73 */
4260        uint32_t compute_user_data_9;  /* ordinal74 */
4261        uint32_t compute_user_data_10;  /* ordinal75 */
4262        uint32_t compute_user_data_11;  /* ordinal76 */
4263        uint32_t compute_user_data_12;  /* ordinal77 */
4264        uint32_t compute_user_data_13;  /* ordinal78 */
4265        uint32_t compute_user_data_14;  /* ordinal79 */
4266        uint32_t compute_user_data_15;  /* ordinal80 */
4267        uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4268        uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4269        uint32_t reserved35;  /* ordinal83 */
4270        uint32_t reserved36;  /* ordinal84 */
4271        uint32_t reserved37;  /* ordinal85 */
4272        uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4273        uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4274        uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4275        uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4276        uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4277        uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4278        uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4279        uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4280        uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4281        uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4282        uint32_t reserved38;  /* ordinal96 */
4283        uint32_t reserved39;  /* ordinal97 */
4284        uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4285        uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4286        uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4287        uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4288        uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4289        uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4290        uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4291        uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4292        uint32_t reserved40;  /* ordinal106 */
4293        uint32_t reserved41;  /* ordinal107 */
4294        uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4295        uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4296        uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4297        uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4298        uint32_t reserved42;  /* ordinal112 */
4299        uint32_t reserved43;  /* ordinal113 */
4300        uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4301        uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4302        uint32_t cp_packet_id_lo;  /* ordinal116 */
4303        uint32_t cp_packet_id_hi;  /* ordinal117 */
4304        uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4305        uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4306        uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4307        uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4308        uint32_t gds_save_mask_lo;  /* ordinal122 */
4309        uint32_t gds_save_mask_hi;  /* ordinal123 */
4310        uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4311        uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4312        uint32_t reserved44;  /* ordinal126 */
4313        uint32_t reserved45;  /* ordinal127 */
4314        uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4315        uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4316        uint32_t cp_hqd_active;  /* ordinal130 */
4317        uint32_t cp_hqd_vmid;  /* ordinal131 */
4318        uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4319        uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4320        uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4321        uint32_t cp_hqd_quantum;  /* ordinal135 */
4322        uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4323        uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4324        uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4325        uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4326        uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4327        uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4328        uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4329        uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4330        uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4331        uint32_t cp_hqd_pq_control;  /* ordinal145 */
4332        uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4333        uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4334        uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4335        uint32_t cp_hqd_ib_control;  /* ordinal149 */
4336        uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4337        uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4338        uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4339        uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4340        uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4341        uint32_t cp_hqd_msg_type;  /* ordinal155 */
4342        uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4343        uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4344        uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4345        uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4346        uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4347        uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4348        uint32_t cp_mqd_control;  /* ordinal162 */
4349        uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4350        uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4351        uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4352        uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4353        uint32_t cp_hqd_eop_control;  /* ordinal167 */
4354        uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4355        uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4356        uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4357        uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4358        uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4359        uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4360        uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4361        uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4362        uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4363        uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4364        uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4365        uint32_t cp_hqd_error;  /* ordinal179 */
4366        uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4367        uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4368        uint32_t reserved46;  /* ordinal182 */
4369        uint32_t reserved47;  /* ordinal183 */
4370        uint32_t reserved48;  /* ordinal184 */
4371        uint32_t reserved49;  /* ordinal185 */
4372        uint32_t reserved50;  /* ordinal186 */
4373        uint32_t reserved51;  /* ordinal187 */
4374        uint32_t reserved52;  /* ordinal188 */
4375        uint32_t reserved53;  /* ordinal189 */
4376        uint32_t reserved54;  /* ordinal190 */
4377        uint32_t reserved55;  /* ordinal191 */
4378        uint32_t iqtimer_pkt_header;  /* ordinal192 */
4379        uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4380        uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4381        uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4382        uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4383        uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4384        uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4385        uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4386        uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4387        uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4388        uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4389        uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4390        uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4391        uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4392        uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4393        uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4394        uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4395        uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4396        uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4397        uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4398        uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4399        uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4400        uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4401        uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4402        uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4403        uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4404        uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4405        uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4406        uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4407        uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4408        uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4409        uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4410        uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4411        uint32_t reserved56;  /* ordinal225 */
4412        uint32_t reserved57;  /* ordinal226 */
4413        uint32_t reserved58;  /* ordinal227 */
4414        uint32_t set_resources_header;  /* ordinal228 */
4415        uint32_t set_resources_dw1;  /* ordinal229 */
4416        uint32_t set_resources_dw2;  /* ordinal230 */
4417        uint32_t set_resources_dw3;  /* ordinal231 */
4418        uint32_t set_resources_dw4;  /* ordinal232 */
4419        uint32_t set_resources_dw5;  /* ordinal233 */
4420        uint32_t set_resources_dw6;  /* ordinal234 */
4421        uint32_t set_resources_dw7;  /* ordinal235 */
4422        uint32_t reserved59;  /* ordinal236 */
4423        uint32_t reserved60;  /* ordinal237 */
4424        uint32_t reserved61;  /* ordinal238 */
4425        uint32_t reserved62;  /* ordinal239 */
4426        uint32_t reserved63;  /* ordinal240 */
4427        uint32_t reserved64;  /* ordinal241 */
4428        uint32_t reserved65;  /* ordinal242 */
4429        uint32_t reserved66;  /* ordinal243 */
4430        uint32_t reserved67;  /* ordinal244 */
4431        uint32_t reserved68;  /* ordinal245 */
4432        uint32_t reserved69;  /* ordinal246 */
4433        uint32_t reserved70;  /* ordinal247 */
4434        uint32_t reserved71;  /* ordinal248 */
4435        uint32_t reserved72;  /* ordinal249 */
4436        uint32_t reserved73;  /* ordinal250 */
4437        uint32_t reserved74;  /* ordinal251 */
4438        uint32_t reserved75;  /* ordinal252 */
4439        uint32_t reserved76;  /* ordinal253 */
4440        uint32_t reserved77;  /* ordinal254 */
4441        uint32_t reserved78;  /* ordinal255 */
4442
4443        uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4444};
4445
4446static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4447{
4448        int i, r;
4449
4450        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4451                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4452
4453                if (ring->mqd_obj) {
4454                        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4455                        if (unlikely(r != 0))
4456                                dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4457
4458                        amdgpu_bo_unpin(ring->mqd_obj);
4459                        amdgpu_bo_unreserve(ring->mqd_obj);
4460
4461                        amdgpu_bo_unref(&ring->mqd_obj);
4462                        ring->mqd_obj = NULL;
4463                }
4464        }
4465}
4466
4467static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4468{
4469        int r, i, j;
4470        u32 tmp;
4471        bool use_doorbell = true;
4472        u64 hqd_gpu_addr;
4473        u64 mqd_gpu_addr;
4474        u64 eop_gpu_addr;
4475        u64 wb_gpu_addr;
4476        u32 *buf;
4477        struct vi_mqd *mqd;
4478
4479        /* init the pipes */
4480        mutex_lock(&adev->srbm_mutex);
4481        for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4482                int me = (i < 4) ? 1 : 2;
4483                int pipe = (i < 4) ? i : (i - 4);
4484
4485                eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4486                eop_gpu_addr >>= 8;
4487
4488                vi_srbm_select(adev, me, pipe, 0, 0);
4489
4490                /* write the EOP addr */
4491                WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4492                WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4493
4494                /* set the VMID assigned */
4495                WREG32(mmCP_HQD_VMID, 0);
4496
4497                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4498                tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4499                tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4500                                    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4501                WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4502        }
4503        vi_srbm_select(adev, 0, 0, 0, 0);
4504        mutex_unlock(&adev->srbm_mutex);
4505
4506        /* init the queues.  Just two for now. */
4507        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4508                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4509
4510                if (ring->mqd_obj == NULL) {
4511                        r = amdgpu_bo_create(adev,
4512                                             sizeof(struct vi_mqd),
4513                                             PAGE_SIZE, true,
4514                                             AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4515                                             NULL, &ring->mqd_obj);
4516                        if (r) {
4517                                dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4518                                return r;
4519                        }
4520                }
4521
4522                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4523                if (unlikely(r != 0)) {
4524                        gfx_v8_0_cp_compute_fini(adev);
4525                        return r;
4526                }
4527                r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4528                                  &mqd_gpu_addr);
4529                if (r) {
4530                        dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4531                        gfx_v8_0_cp_compute_fini(adev);
4532                        return r;
4533                }
4534                r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4535                if (r) {
4536                        dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4537                        gfx_v8_0_cp_compute_fini(adev);
4538                        return r;
4539                }
4540
4541                /* init the mqd struct */
4542                memset(buf, 0, sizeof(struct vi_mqd));
4543
4544                mqd = (struct vi_mqd *)buf;
4545                mqd->header = 0xC0310800;
4546                mqd->compute_pipelinestat_enable = 0x00000001;
4547                mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4548                mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4549                mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4550                mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4551                mqd->compute_misc_reserved = 0x00000003;
4552
4553                mutex_lock(&adev->srbm_mutex);
4554                vi_srbm_select(adev, ring->me,
4555                               ring->pipe,
4556                               ring->queue, 0);
4557
4558                /* disable wptr polling */
4559                tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4560                tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4561                WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4562
4563                mqd->cp_hqd_eop_base_addr_lo =
4564                        RREG32(mmCP_HQD_EOP_BASE_ADDR);
4565                mqd->cp_hqd_eop_base_addr_hi =
4566                        RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4567
4568                /* enable doorbell? */
4569                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4570                if (use_doorbell) {
4571                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4572                } else {
4573                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4574                }
4575                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4576                mqd->cp_hqd_pq_doorbell_control = tmp;
4577
4578                /* disable the queue if it's active */
4579                mqd->cp_hqd_dequeue_request = 0;
4580                mqd->cp_hqd_pq_rptr = 0;
4581                mqd->cp_hqd_pq_wptr= 0;
4582                if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4583                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4584                        for (j = 0; j < adev->usec_timeout; j++) {
4585                                if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4586                                        break;
4587                                udelay(1);
4588                        }
4589                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4590                        WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4591                        WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4592                }
4593
4594                /* set the pointer to the MQD */
4595                mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4596                mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4597                WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4598                WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4599
4600                /* set MQD vmid to 0 */
4601                tmp = RREG32(mmCP_MQD_CONTROL);
4602                tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4603                WREG32(mmCP_MQD_CONTROL, tmp);
4604                mqd->cp_mqd_control = tmp;
4605
4606                /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4607                hqd_gpu_addr = ring->gpu_addr >> 8;
4608                mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4609                mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4610                WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4611                WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4612
4613                /* set up the HQD, this is similar to CP_RB0_CNTL */
4614                tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4615                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4616                                    (order_base_2(ring->ring_size / 4) - 1));
4617                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4618                               ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4619#ifdef __BIG_ENDIAN
4620                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4621#endif
4622                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4623                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4624                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4625                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4626                WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4627                mqd->cp_hqd_pq_control = tmp;
4628
4629                /* set the wb address wether it's enabled or not */
4630                wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4631                mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4632                mqd->cp_hqd_pq_rptr_report_addr_hi =
4633                        upper_32_bits(wb_gpu_addr) & 0xffff;
4634                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4635                       mqd->cp_hqd_pq_rptr_report_addr_lo);
4636                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4637                       mqd->cp_hqd_pq_rptr_report_addr_hi);
4638
4639                /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640                wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641                mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4642                mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4643                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4644                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4645                       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4646
4647                /* enable the doorbell if requested */
4648                if (use_doorbell) {
4649                        if ((adev->asic_type == CHIP_CARRIZO) ||
4650                            (adev->asic_type == CHIP_FIJI) ||
4651                            (adev->asic_type == CHIP_STONEY) ||
4652                            (adev->asic_type == CHIP_POLARIS11) ||
4653                            (adev->asic_type == CHIP_POLARIS10)) {
4654                                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4655                                       AMDGPU_DOORBELL_KIQ << 2);
4656                                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4657                                       AMDGPU_DOORBELL_MEC_RING7 << 2);
4658                        }
4659                        tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4660                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4661                                            DOORBELL_OFFSET, ring->doorbell_index);
4662                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4663                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4664                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4665                        mqd->cp_hqd_pq_doorbell_control = tmp;
4666
4667                } else {
4668                        mqd->cp_hqd_pq_doorbell_control = 0;
4669                }
4670                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4671                       mqd->cp_hqd_pq_doorbell_control);
4672
4673                /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4674                ring->wptr = 0;
4675                mqd->cp_hqd_pq_wptr = ring->wptr;
4676                WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4677                mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4678
4679                /* set the vmid for the queue */
4680                mqd->cp_hqd_vmid = 0;
4681                WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4682
4683                tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4684                tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4685                WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4686                mqd->cp_hqd_persistent_state = tmp;
4687                if (adev->asic_type == CHIP_STONEY ||
4688                        adev->asic_type == CHIP_POLARIS11 ||
4689                        adev->asic_type == CHIP_POLARIS10) {
4690                        tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4691                        tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4692                        WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4693                }
4694
4695                /* activate the queue */
4696                mqd->cp_hqd_active = 1;
4697                WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4698
4699                vi_srbm_select(adev, 0, 0, 0, 0);
4700                mutex_unlock(&adev->srbm_mutex);
4701
4702                amdgpu_bo_kunmap(ring->mqd_obj);
4703                amdgpu_bo_unreserve(ring->mqd_obj);
4704        }
4705
4706        if (use_doorbell) {
4707                tmp = RREG32(mmCP_PQ_STATUS);
4708                tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4709                WREG32(mmCP_PQ_STATUS, tmp);
4710        }
4711
4712        gfx_v8_0_cp_compute_enable(adev, true);
4713
4714        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4715                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4716
4717                ring->ready = true;
4718                r = amdgpu_ring_test_ring(ring);
4719                if (r)
4720                        ring->ready = false;
4721        }
4722
4723        return 0;
4724}
4725
4726static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4727{
4728        int r;
4729
4730        if (!(adev->flags & AMD_IS_APU))
4731                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4732
4733        if (!adev->pp_enabled) {
4734                if (!adev->firmware.smu_load) {
4735                        /* legacy firmware loading */
4736                        r = gfx_v8_0_cp_gfx_load_microcode(adev);
4737                        if (r)
4738                                return r;
4739
4740                        r = gfx_v8_0_cp_compute_load_microcode(adev);
4741                        if (r)
4742                                return r;
4743                } else {
4744                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4745                                                        AMDGPU_UCODE_ID_CP_CE);
4746                        if (r)
4747                                return -EINVAL;
4748
4749                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4750                                                        AMDGPU_UCODE_ID_CP_PFP);
4751                        if (r)
4752                                return -EINVAL;
4753
4754                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4755                                                        AMDGPU_UCODE_ID_CP_ME);
4756                        if (r)
4757                                return -EINVAL;
4758
4759                        if (adev->asic_type == CHIP_TOPAZ) {
4760                                r = gfx_v8_0_cp_compute_load_microcode(adev);
4761                                if (r)
4762                                        return r;
4763                        } else {
4764                                r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4765                                                                                 AMDGPU_UCODE_ID_CP_MEC1);
4766                                if (r)
4767                                        return -EINVAL;
4768                        }
4769                }
4770        }
4771
4772        r = gfx_v8_0_cp_gfx_resume(adev);
4773        if (r)
4774                return r;
4775
4776        r = gfx_v8_0_cp_compute_resume(adev);
4777        if (r)
4778                return r;
4779
4780        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4781
4782        return 0;
4783}
4784
4785static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4786{
4787        gfx_v8_0_cp_gfx_enable(adev, enable);
4788        gfx_v8_0_cp_compute_enable(adev, enable);
4789}
4790
4791static int gfx_v8_0_hw_init(void *handle)
4792{
4793        int r;
4794        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795
4796        gfx_v8_0_init_golden_registers(adev);
4797
4798        gfx_v8_0_gpu_init(adev);
4799
4800        r = gfx_v8_0_rlc_resume(adev);
4801        if (r)
4802                return r;
4803
4804        r = gfx_v8_0_cp_resume(adev);
4805        if (r)
4806                return r;
4807
4808        return r;
4809}
4810
4811static int gfx_v8_0_hw_fini(void *handle)
4812{
4813        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814
4815        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4816        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4817        gfx_v8_0_cp_enable(adev, false);
4818        gfx_v8_0_rlc_stop(adev);
4819        gfx_v8_0_cp_compute_fini(adev);
4820
4821        amdgpu_set_powergating_state(adev,
4822                        AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4823
4824        return 0;
4825}
4826
4827static int gfx_v8_0_suspend(void *handle)
4828{
4829        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830
4831        return gfx_v8_0_hw_fini(adev);
4832}
4833
4834static int gfx_v8_0_resume(void *handle)
4835{
4836        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
4838        return gfx_v8_0_hw_init(adev);
4839}
4840
4841static bool gfx_v8_0_is_idle(void *handle)
4842{
4843        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4846                return false;
4847        else
4848                return true;
4849}
4850
4851static int gfx_v8_0_wait_for_idle(void *handle)
4852{
4853        unsigned i;
4854        u32 tmp;
4855        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4856
4857        for (i = 0; i < adev->usec_timeout; i++) {
4858                /* read MC_STATUS */
4859                tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4860
4861                if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4862                        return 0;
4863                udelay(1);
4864        }
4865        return -ETIMEDOUT;
4866}
4867
4868static int gfx_v8_0_soft_reset(void *handle)
4869{
4870        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4871        u32 tmp;
4872        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873
4874        /* GRBM_STATUS */
4875        tmp = RREG32(mmGRBM_STATUS);
4876        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4877                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4878                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4879                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4880                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4881                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4882                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4885                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4886        }
4887
4888        if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4889                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4890                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4891                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4892                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4893        }
4894
4895        /* GRBM_STATUS2 */
4896        tmp = RREG32(mmGRBM_STATUS2);
4897        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4898                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4900
4901        /* SRBM_STATUS */
4902        tmp = RREG32(mmSRBM_STATUS);
4903        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4904                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4905                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4906
4907        if (grbm_soft_reset || srbm_soft_reset) {
4908                /* stop the rlc */
4909                gfx_v8_0_rlc_stop(adev);
4910
4911                /* Disable GFX parsing/prefetching */
4912                gfx_v8_0_cp_gfx_enable(adev, false);
4913
4914                /* Disable MEC parsing/prefetching */
4915                gfx_v8_0_cp_compute_enable(adev, false);
4916
4917                if (grbm_soft_reset || srbm_soft_reset) {
4918                        tmp = RREG32(mmGMCON_DEBUG);
4919                        tmp = REG_SET_FIELD(tmp,
4920                                            GMCON_DEBUG, GFX_STALL, 1);
4921                        tmp = REG_SET_FIELD(tmp,
4922                                            GMCON_DEBUG, GFX_CLEAR, 1);
4923                        WREG32(mmGMCON_DEBUG, tmp);
4924
4925                        udelay(50);
4926                }
4927
4928                if (grbm_soft_reset) {
4929                        tmp = RREG32(mmGRBM_SOFT_RESET);
4930                        tmp |= grbm_soft_reset;
4931                        dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4932                        WREG32(mmGRBM_SOFT_RESET, tmp);
4933                        tmp = RREG32(mmGRBM_SOFT_RESET);
4934
4935                        udelay(50);
4936
4937                        tmp &= ~grbm_soft_reset;
4938                        WREG32(mmGRBM_SOFT_RESET, tmp);
4939                        tmp = RREG32(mmGRBM_SOFT_RESET);
4940                }
4941
4942                if (srbm_soft_reset) {
4943                        tmp = RREG32(mmSRBM_SOFT_RESET);
4944                        tmp |= srbm_soft_reset;
4945                        dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4946                        WREG32(mmSRBM_SOFT_RESET, tmp);
4947                        tmp = RREG32(mmSRBM_SOFT_RESET);
4948
4949                        udelay(50);
4950
4951                        tmp &= ~srbm_soft_reset;
4952                        WREG32(mmSRBM_SOFT_RESET, tmp);
4953                        tmp = RREG32(mmSRBM_SOFT_RESET);
4954                }
4955
4956                if (grbm_soft_reset || srbm_soft_reset) {
4957                        tmp = RREG32(mmGMCON_DEBUG);
4958                        tmp = REG_SET_FIELD(tmp,
4959                                            GMCON_DEBUG, GFX_STALL, 0);
4960                        tmp = REG_SET_FIELD(tmp,
4961                                            GMCON_DEBUG, GFX_CLEAR, 0);
4962                        WREG32(mmGMCON_DEBUG, tmp);
4963                }
4964
4965                /* Wait a little for things to settle down */
4966                udelay(50);
4967        }
4968        return 0;
4969}
4970
4971/**
4972 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4973 *
4974 * @adev: amdgpu_device pointer
4975 *
4976 * Fetches a GPU clock counter snapshot.
4977 * Returns the 64 bit clock counter snapshot.
4978 */
4979uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4980{
4981        uint64_t clock;
4982
4983        mutex_lock(&adev->gfx.gpu_clock_mutex);
4984        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4985        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4986                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4987        mutex_unlock(&adev->gfx.gpu_clock_mutex);
4988        return clock;
4989}
4990
4991static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4992                                          uint32_t vmid,
4993                                          uint32_t gds_base, uint32_t gds_size,
4994                                          uint32_t gws_base, uint32_t gws_size,
4995                                          uint32_t oa_base, uint32_t oa_size)
4996{
4997        gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4998        gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4999
5000        gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5001        gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5002
5003        oa_base = oa_base >> AMDGPU_OA_SHIFT;
5004        oa_size = oa_size >> AMDGPU_OA_SHIFT;
5005
5006        /* GDS Base */
5007        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5008        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5009                                WRITE_DATA_DST_SEL(0)));
5010        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5011        amdgpu_ring_write(ring, 0);
5012        amdgpu_ring_write(ring, gds_base);
5013
5014        /* GDS Size */
5015        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5016        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5017                                WRITE_DATA_DST_SEL(0)));
5018        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5019        amdgpu_ring_write(ring, 0);
5020        amdgpu_ring_write(ring, gds_size);
5021
5022        /* GWS */
5023        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5025                                WRITE_DATA_DST_SEL(0)));
5026        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5027        amdgpu_ring_write(ring, 0);
5028        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5029
5030        /* OA */
5031        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5033                                WRITE_DATA_DST_SEL(0)));
5034        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5035        amdgpu_ring_write(ring, 0);
5036        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5037}
5038
5039static int gfx_v8_0_early_init(void *handle)
5040{
5041        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5044        adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5045        gfx_v8_0_set_ring_funcs(adev);
5046        gfx_v8_0_set_irq_funcs(adev);
5047        gfx_v8_0_set_gds_init(adev);
5048        gfx_v8_0_set_rlc_funcs(adev);
5049
5050        return 0;
5051}
5052
5053static int gfx_v8_0_late_init(void *handle)
5054{
5055        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5056        int r;
5057
5058        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5059        if (r)
5060                return r;
5061
5062        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5063        if (r)
5064                return r;
5065
5066        /* requires IBs so do in late init after IB pool is initialized */
5067        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5068        if (r)
5069                return r;
5070
5071        amdgpu_set_powergating_state(adev,
5072                        AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5073
5074        return 0;
5075}
5076
5077static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5078                bool enable)
5079{
5080        uint32_t data, temp;
5081
5082        /* Send msg to SMU via Powerplay */
5083        amdgpu_set_powergating_state(adev,
5084                        AMD_IP_BLOCK_TYPE_SMC,
5085                        enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5086
5087        if (enable) {
5088                /* Enable static MGPG */
5089                temp = data = RREG32(mmRLC_PG_CNTL);
5090                data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5091
5092                if (temp != data)
5093                        WREG32(mmRLC_PG_CNTL, data);
5094        } else {
5095                temp = data = RREG32(mmRLC_PG_CNTL);
5096                data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5097
5098                if (temp != data)
5099                        WREG32(mmRLC_PG_CNTL, data);
5100        }
5101}
5102
5103static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5104                bool enable)
5105{
5106        uint32_t data, temp;
5107
5108        if (enable) {
5109                /* Enable dynamic MGPG */
5110                temp = data = RREG32(mmRLC_PG_CNTL);
5111                data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5112
5113                if (temp != data)
5114                        WREG32(mmRLC_PG_CNTL, data);
5115        } else {
5116                temp = data = RREG32(mmRLC_PG_CNTL);
5117                data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5118
5119                if (temp != data)
5120                        WREG32(mmRLC_PG_CNTL, data);
5121        }
5122}
5123
5124static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5125                bool enable)
5126{
5127        uint32_t data, temp;
5128
5129        if (enable) {
5130                /* Enable quick PG */
5131                temp = data = RREG32(mmRLC_PG_CNTL);
5132                data |= 0x100000;
5133
5134                if (temp != data)
5135                        WREG32(mmRLC_PG_CNTL, data);
5136        } else {
5137                temp = data = RREG32(mmRLC_PG_CNTL);
5138                data &= ~0x100000;
5139
5140                if (temp != data)
5141                        WREG32(mmRLC_PG_CNTL, data);
5142        }
5143}
5144
5145static int gfx_v8_0_set_powergating_state(void *handle,
5146                                          enum amd_powergating_state state)
5147{
5148        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149
5150        if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5151                return 0;
5152
5153        switch (adev->asic_type) {
5154        case CHIP_POLARIS11:
5155                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5156                        polaris11_enable_gfx_static_mg_power_gating(adev,
5157                                        state == AMD_PG_STATE_GATE ? true : false);
5158                else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5159                        polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5160                                        state == AMD_PG_STATE_GATE ? true : false);
5161                else
5162                        polaris11_enable_gfx_quick_mg_power_gating(adev,
5163                                        state == AMD_PG_STATE_GATE ? true : false);
5164                break;
5165        default:
5166                break;
5167        }
5168
5169        return 0;
5170}
5171
5172static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5173                                     uint32_t reg_addr, uint32_t cmd)
5174{
5175        uint32_t data;
5176
5177        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5178
5179        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5180        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5181
5182        data = RREG32(mmRLC_SERDES_WR_CTRL);
5183        if (adev->asic_type == CHIP_STONEY)
5184                        data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5185                        RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5186                        RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5187                        RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5188                        RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5189                        RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5190                        RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5191                        RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5192                        RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5193        else
5194                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5195                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5196                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5197                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5198                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5199                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5200                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5201                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5202                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5203                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5204                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5205        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5206                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5207                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5208                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5209
5210        WREG32(mmRLC_SERDES_WR_CTRL, data);
5211}
5212
5213#define MSG_ENTER_RLC_SAFE_MODE     1
5214#define MSG_EXIT_RLC_SAFE_MODE      0
5215
5216#define RLC_GPR_REG2__REQ_MASK           0x00000001
5217#define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5218#define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5219
5220static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5221{
5222        u32 data = 0;
5223        unsigned i;
5224
5225        data = RREG32(mmRLC_CNTL);
5226        if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5227                return;
5228
5229        if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5230            (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5231                               AMD_PG_SUPPORT_GFX_DMG))) {
5232                data |= RLC_GPR_REG2__REQ_MASK;
5233                data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5234                data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5235                WREG32(mmRLC_GPR_REG2, data);
5236
5237                for (i = 0; i < adev->usec_timeout; i++) {
5238                        if ((RREG32(mmRLC_GPM_STAT) &
5239                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5240                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5241                            (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5242                             RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5243                                break;
5244                        udelay(1);
5245                }
5246
5247                for (i = 0; i < adev->usec_timeout; i++) {
5248                        if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5249                                break;
5250                        udelay(1);
5251                }
5252                adev->gfx.rlc.in_safe_mode = true;
5253        }
5254}
5255
5256static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5257{
5258        u32 data;
5259        unsigned i;
5260
5261        data = RREG32(mmRLC_CNTL);
5262        if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5263                return;
5264
5265        if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5266            (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5267                               AMD_PG_SUPPORT_GFX_DMG))) {
5268                data |= RLC_GPR_REG2__REQ_MASK;
5269                data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5270                data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5271                WREG32(mmRLC_GPR_REG2, data);
5272                adev->gfx.rlc.in_safe_mode = false;
5273        }
5274
5275        for (i = 0; i < adev->usec_timeout; i++) {
5276                if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5277                        break;
5278                udelay(1);
5279        }
5280}
5281
5282static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5283{
5284        u32 data;
5285        unsigned i;
5286
5287        data = RREG32(mmRLC_CNTL);
5288        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5289                return;
5290
5291        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5292                data |= RLC_SAFE_MODE__CMD_MASK;
5293                data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294                data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5295                WREG32(mmRLC_SAFE_MODE, data);
5296
5297                for (i = 0; i < adev->usec_timeout; i++) {
5298                        if ((RREG32(mmRLC_GPM_STAT) &
5299                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5300                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5301                            (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5302                             RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5303                                break;
5304                        udelay(1);
5305                }
5306
5307                for (i = 0; i < adev->usec_timeout; i++) {
5308                        if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5309                                break;
5310                        udelay(1);
5311                }
5312                adev->gfx.rlc.in_safe_mode = true;
5313        }
5314}
5315
5316static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5317{
5318        u32 data = 0;
5319        unsigned i;
5320
5321        data = RREG32(mmRLC_CNTL);
5322        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5323                return;
5324
5325        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5326                if (adev->gfx.rlc.in_safe_mode) {
5327                        data |= RLC_SAFE_MODE__CMD_MASK;
5328                        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5329                        WREG32(mmRLC_SAFE_MODE, data);
5330                        adev->gfx.rlc.in_safe_mode = false;
5331                }
5332        }
5333
5334        for (i = 0; i < adev->usec_timeout; i++) {
5335                if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5336                        break;
5337                udelay(1);
5338        }
5339}
5340
5341static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5342{
5343        adev->gfx.rlc.in_safe_mode = true;
5344}
5345
5346static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5347{
5348        adev->gfx.rlc.in_safe_mode = false;
5349}
5350
5351static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5352        .enter_safe_mode = cz_enter_rlc_safe_mode,
5353        .exit_safe_mode = cz_exit_rlc_safe_mode
5354};
5355
5356static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5357        .enter_safe_mode = iceland_enter_rlc_safe_mode,
5358        .exit_safe_mode = iceland_exit_rlc_safe_mode
5359};
5360
5361static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5362        .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5363        .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5364};
5365
5366static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5367                                                      bool enable)
5368{
5369        uint32_t temp, data;
5370
5371        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5372
5373        /* It is disabled by HW by default */
5374        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5375                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5376                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5377                                /* 1 - RLC memory Light sleep */
5378                                temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5379                                data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5380                                if (temp != data)
5381                                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5382                        }
5383
5384                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5385                                /* 2 - CP memory Light sleep */
5386                                temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5387                                data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5388                                if (temp != data)
5389                                        WREG32(mmCP_MEM_SLP_CNTL, data);
5390                        }
5391                }
5392
5393                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5394                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5395                if (adev->flags & AMD_IS_APU)
5396                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5397                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5398                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5399                else
5400                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5401                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5402                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5403                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5404
5405                if (temp != data)
5406                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407
5408                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5409                gfx_v8_0_wait_for_rlc_serdes(adev);
5410
5411                /* 5 - clear mgcg override */
5412                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5413
5414                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5415                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5416                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5417                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5418                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5419                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5420                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5421                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5422                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5423                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5424                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5425                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5426                        if (temp != data)
5427                                WREG32(mmCGTS_SM_CTRL_REG, data);
5428                }
5429                udelay(50);
5430
5431                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5432                gfx_v8_0_wait_for_rlc_serdes(adev);
5433        } else {
5434                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5435                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5436                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5437                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5438                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5439                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5440                if (temp != data)
5441                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5442
5443                /* 2 - disable MGLS in RLC */
5444                data = RREG32(mmRLC_MEM_SLP_CNTL);
5445                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5446                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5447                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5448                }
5449
5450                /* 3 - disable MGLS in CP */
5451                data = RREG32(mmCP_MEM_SLP_CNTL);
5452                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5453                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5454                        WREG32(mmCP_MEM_SLP_CNTL, data);
5455                }
5456
5457                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5458                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5459                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5460                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5461                if (temp != data)
5462                        WREG32(mmCGTS_SM_CTRL_REG, data);
5463
5464                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465                gfx_v8_0_wait_for_rlc_serdes(adev);
5466
5467                /* 6 - set mgcg override */
5468                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5469
5470                udelay(50);
5471
5472                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5473                gfx_v8_0_wait_for_rlc_serdes(adev);
5474        }
5475
5476        adev->gfx.rlc.funcs->exit_safe_mode(adev);
5477}
5478
5479static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5480                                                      bool enable)
5481{
5482        uint32_t temp, temp1, data, data1;
5483
5484        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5485
5486        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5487
5488        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5489                /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5490                 * Cmp_busy/GFX_Idle interrupts
5491                 */
5492                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5493
5494                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5495                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5496                if (temp1 != data1)
5497                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5498
5499                /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5500                gfx_v8_0_wait_for_rlc_serdes(adev);
5501
5502                /* 3 - clear cgcg override */
5503                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5504
5505                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5506                gfx_v8_0_wait_for_rlc_serdes(adev);
5507
5508                /* 4 - write cmd to set CGLS */
5509                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5510
5511                /* 5 - enable cgcg */
5512                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5513
5514                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5515                        /* enable cgls*/
5516                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5517
5518                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5519                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5520
5521                        if (temp1 != data1)
5522                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5523                } else {
5524                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5525                }
5526
5527                if (temp != data)
5528                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5529        } else {
5530                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5531                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5532
5533                /* TEST CGCG */
5534                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5535                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5536                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5537                if (temp1 != data1)
5538                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5539
5540                /* read gfx register to wake up cgcg */
5541                RREG32(mmCB_CGTT_SCLK_CTRL);
5542                RREG32(mmCB_CGTT_SCLK_CTRL);
5543                RREG32(mmCB_CGTT_SCLK_CTRL);
5544                RREG32(mmCB_CGTT_SCLK_CTRL);
5545
5546                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5547                gfx_v8_0_wait_for_rlc_serdes(adev);
5548
5549                /* write cmd to Set CGCG Overrride */
5550                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5551
5552                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5553                gfx_v8_0_wait_for_rlc_serdes(adev);
5554
5555                /* write cmd to Clear CGLS */
5556                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5557
5558                /* disable cgcg, cgls should be disabled too. */
5559                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5560                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5561                if (temp != data)
5562                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5563        }
5564
5565        adev->gfx.rlc.funcs->exit_safe_mode(adev);
5566}
5567static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5568                                            bool enable)
5569{
5570        if (enable) {
5571                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5572                 * ===  MGCG + MGLS + TS(CG/LS) ===
5573                 */
5574                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5576        } else {
5577                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5578                 * ===  CGCG + CGLS ===
5579                 */
5580                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5581                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5582        }
5583        return 0;
5584}
5585
5586static int gfx_v8_0_set_clockgating_state(void *handle,
5587                                          enum amd_clockgating_state state)
5588{
5589        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5590
5591        switch (adev->asic_type) {
5592        case CHIP_FIJI:
5593        case CHIP_CARRIZO:
5594        case CHIP_STONEY:
5595                gfx_v8_0_update_gfx_clock_gating(adev,
5596                                                 state == AMD_CG_STATE_GATE ? true : false);
5597                break;
5598        default:
5599                break;
5600        }
5601        return 0;
5602}
5603
5604static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5605{
5606        u32 rptr;
5607
5608        rptr = ring->adev->wb.wb[ring->rptr_offs];
5609
5610        return rptr;
5611}
5612
5613static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5614{
5615        struct amdgpu_device *adev = ring->adev;
5616        u32 wptr;
5617
5618        if (ring->use_doorbell)
5619                /* XXX check if swapping is necessary on BE */
5620                wptr = ring->adev->wb.wb[ring->wptr_offs];
5621        else
5622                wptr = RREG32(mmCP_RB0_WPTR);
5623
5624        return wptr;
5625}
5626
5627static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5628{
5629        struct amdgpu_device *adev = ring->adev;
5630
5631        if (ring->use_doorbell) {
5632                /* XXX check if swapping is necessary on BE */
5633                adev->wb.wb[ring->wptr_offs] = ring->wptr;
5634                WDOORBELL32(ring->doorbell_index, ring->wptr);
5635        } else {
5636                WREG32(mmCP_RB0_WPTR, ring->wptr);
5637                (void)RREG32(mmCP_RB0_WPTR);
5638        }
5639}
5640
5641static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5642{
5643        u32 ref_and_mask, reg_mem_engine;
5644
5645        if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5646                switch (ring->me) {
5647                case 1:
5648                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5649                        break;
5650                case 2:
5651                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5652                        break;
5653                default:
5654                        return;
5655                }
5656                reg_mem_engine = 0;
5657        } else {
5658                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5659                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5660        }
5661
5662        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5663        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5664                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5665                                 reg_mem_engine));
5666        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5667        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5668        amdgpu_ring_write(ring, ref_and_mask);
5669        amdgpu_ring_write(ring, ref_and_mask);
5670        amdgpu_ring_write(ring, 0x20); /* poll interval */
5671}
5672
5673static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5674{
5675        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5677                                 WRITE_DATA_DST_SEL(0) |
5678                                 WR_CONFIRM));
5679        amdgpu_ring_write(ring, mmHDP_DEBUG0);
5680        amdgpu_ring_write(ring, 0);
5681        amdgpu_ring_write(ring, 1);
5682
5683}
5684
5685static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5686                                      struct amdgpu_ib *ib,
5687                                      unsigned vm_id, bool ctx_switch)
5688{
5689        u32 header, control = 0;
5690        u32 next_rptr = ring->wptr + 5;
5691
5692        if (ctx_switch)
5693                next_rptr += 2;
5694
5695        next_rptr += 4;
5696        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5698        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5699        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5700        amdgpu_ring_write(ring, next_rptr);
5701
5702        /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5703        if (ctx_switch) {
5704                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5705                amdgpu_ring_write(ring, 0);
5706        }
5707
5708        if (ib->flags & AMDGPU_IB_FLAG_CE)
5709                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5710        else
5711                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5712
5713        control |= ib->length_dw | (vm_id << 24);
5714
5715        amdgpu_ring_write(ring, header);
5716        amdgpu_ring_write(ring,
5717#ifdef __BIG_ENDIAN
5718                          (2 << 0) |
5719#endif
5720                          (ib->gpu_addr & 0xFFFFFFFC));
5721        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5722        amdgpu_ring_write(ring, control);
5723}
5724
5725static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5726                                          struct amdgpu_ib *ib,
5727                                          unsigned vm_id, bool ctx_switch)
5728{
5729        u32 header, control = 0;
5730        u32 next_rptr = ring->wptr + 5;
5731
5732        control |= INDIRECT_BUFFER_VALID;
5733
5734        next_rptr += 4;
5735        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5736        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5737        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5738        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5739        amdgpu_ring_write(ring, next_rptr);
5740
5741        header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5742
5743        control |= ib->length_dw | (vm_id << 24);
5744
5745        amdgpu_ring_write(ring, header);
5746        amdgpu_ring_write(ring,
5747#ifdef __BIG_ENDIAN
5748                                          (2 << 0) |
5749#endif
5750                                          (ib->gpu_addr & 0xFFFFFFFC));
5751        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5752        amdgpu_ring_write(ring, control);
5753}
5754
5755static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5756                                         u64 seq, unsigned flags)
5757{
5758        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5759        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5760
5761        /* EVENT_WRITE_EOP - flush caches, send int */
5762        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5763        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5764                                 EOP_TC_ACTION_EN |
5765                                 EOP_TC_WB_ACTION_EN |
5766                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5767                                 EVENT_INDEX(5)));
5768        amdgpu_ring_write(ring, addr & 0xfffffffc);
5769        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5770                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5771        amdgpu_ring_write(ring, lower_32_bits(seq));
5772        amdgpu_ring_write(ring, upper_32_bits(seq));
5773
5774}
5775
5776static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5777{
5778        int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5779        uint32_t seq = ring->fence_drv.sync_seq;
5780        uint64_t addr = ring->fence_drv.gpu_addr;
5781
5782        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5783        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5784                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5785                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5786        amdgpu_ring_write(ring, addr & 0xfffffffc);
5787        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5788        amdgpu_ring_write(ring, seq);
5789        amdgpu_ring_write(ring, 0xffffffff);
5790        amdgpu_ring_write(ring, 4); /* poll interval */
5791
5792        if (usepfp) {
5793                /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5794                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5795                amdgpu_ring_write(ring, 0);
5796                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5797                amdgpu_ring_write(ring, 0);
5798        }
5799}
5800
5801static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5802                                        unsigned vm_id, uint64_t pd_addr)
5803{
5804        int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5805
5806        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808                                 WRITE_DATA_DST_SEL(0)) |
5809                                 WR_CONFIRM);
5810        if (vm_id < 8) {
5811                amdgpu_ring_write(ring,
5812                                  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5813        } else {
5814                amdgpu_ring_write(ring,
5815                                  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5816        }
5817        amdgpu_ring_write(ring, 0);
5818        amdgpu_ring_write(ring, pd_addr >> 12);
5819
5820        /* bits 0-15 are the VM contexts0-15 */
5821        /* invalidate the cache */
5822        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5824                                 WRITE_DATA_DST_SEL(0)));
5825        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5826        amdgpu_ring_write(ring, 0);
5827        amdgpu_ring_write(ring, 1 << vm_id);
5828
5829        /* wait for the invalidate to complete */
5830        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5831        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5832                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5833                                 WAIT_REG_MEM_ENGINE(0))); /* me */
5834        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5835        amdgpu_ring_write(ring, 0);
5836        amdgpu_ring_write(ring, 0); /* ref */
5837        amdgpu_ring_write(ring, 0); /* mask */
5838        amdgpu_ring_write(ring, 0x20); /* poll interval */
5839
5840        /* compute doesn't have PFP */
5841        if (usepfp) {
5842                /* sync PFP to ME, otherwise we might get invalid PFP reads */
5843                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5844                amdgpu_ring_write(ring, 0x0);
5845                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5846                amdgpu_ring_write(ring, 0);
5847                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5848                amdgpu_ring_write(ring, 0);
5849        }
5850}
5851
5852static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5853{
5854        return ring->adev->wb.wb[ring->rptr_offs];
5855}
5856
5857static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5858{
5859        return ring->adev->wb.wb[ring->wptr_offs];
5860}
5861
5862static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5863{
5864        struct amdgpu_device *adev = ring->adev;
5865
5866        /* XXX check if swapping is necessary on BE */
5867        adev->wb.wb[ring->wptr_offs] = ring->wptr;
5868        WDOORBELL32(ring->doorbell_index, ring->wptr);
5869}
5870
5871static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5872                                             u64 addr, u64 seq,
5873                                             unsigned flags)
5874{
5875        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5876        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5877
5878        /* RELEASE_MEM - flush caches, send int */
5879        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5880        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5881                                 EOP_TC_ACTION_EN |
5882                                 EOP_TC_WB_ACTION_EN |
5883                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5884                                 EVENT_INDEX(5)));
5885        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5886        amdgpu_ring_write(ring, addr & 0xfffffffc);
5887        amdgpu_ring_write(ring, upper_32_bits(addr));
5888        amdgpu_ring_write(ring, lower_32_bits(seq));
5889        amdgpu_ring_write(ring, upper_32_bits(seq));
5890}
5891
5892static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5893                                                 enum amdgpu_interrupt_state state)
5894{
5895        u32 cp_int_cntl;
5896
5897        switch (state) {
5898        case AMDGPU_IRQ_STATE_DISABLE:
5899                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5900                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901                                            TIME_STAMP_INT_ENABLE, 0);
5902                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903                break;
5904        case AMDGPU_IRQ_STATE_ENABLE:
5905                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5906                cp_int_cntl =
5907                        REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5908                                      TIME_STAMP_INT_ENABLE, 1);
5909                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5910                break;
5911        default:
5912                break;
5913        }
5914}
5915
5916static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5917                                                     int me, int pipe,
5918                                                     enum amdgpu_interrupt_state state)
5919{
5920        u32 mec_int_cntl, mec_int_cntl_reg;
5921
5922        /*
5923         * amdgpu controls only pipe 0 of MEC1. That's why this function only
5924         * handles the setting of interrupts for this specific pipe. All other
5925         * pipes' interrupts are set by amdkfd.
5926         */
5927
5928        if (me == 1) {
5929                switch (pipe) {
5930                case 0:
5931                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5932                        break;
5933                default:
5934                        DRM_DEBUG("invalid pipe %d\n", pipe);
5935                        return;
5936                }
5937        } else {
5938                DRM_DEBUG("invalid me %d\n", me);
5939                return;
5940        }
5941
5942        switch (state) {
5943        case AMDGPU_IRQ_STATE_DISABLE:
5944                mec_int_cntl = RREG32(mec_int_cntl_reg);
5945                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5946                                             TIME_STAMP_INT_ENABLE, 0);
5947                WREG32(mec_int_cntl_reg, mec_int_cntl);
5948                break;
5949        case AMDGPU_IRQ_STATE_ENABLE:
5950                mec_int_cntl = RREG32(mec_int_cntl_reg);
5951                mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5952                                             TIME_STAMP_INT_ENABLE, 1);
5953                WREG32(mec_int_cntl_reg, mec_int_cntl);
5954                break;
5955        default:
5956                break;
5957        }
5958}
5959
5960static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5961                                             struct amdgpu_irq_src *source,
5962                                             unsigned type,
5963                                             enum amdgpu_interrupt_state state)
5964{
5965        u32 cp_int_cntl;
5966
5967        switch (state) {
5968        case AMDGPU_IRQ_STATE_DISABLE:
5969                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5970                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5971                                            PRIV_REG_INT_ENABLE, 0);
5972                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5973                break;
5974        case AMDGPU_IRQ_STATE_ENABLE:
5975                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5976                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5977                                            PRIV_REG_INT_ENABLE, 1);
5978                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5979                break;
5980        default:
5981                break;
5982        }
5983
5984        return 0;
5985}
5986
5987static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5988                                              struct amdgpu_irq_src *source,
5989                                              unsigned type,
5990                                              enum amdgpu_interrupt_state state)
5991{
5992        u32 cp_int_cntl;
5993
5994        switch (state) {
5995        case AMDGPU_IRQ_STATE_DISABLE:
5996                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5997                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5998                                            PRIV_INSTR_INT_ENABLE, 0);
5999                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6000                break;
6001        case AMDGPU_IRQ_STATE_ENABLE:
6002                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6003                cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6004                                            PRIV_INSTR_INT_ENABLE, 1);
6005                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6006                break;
6007        default:
6008                break;
6009        }
6010
6011        return 0;
6012}
6013
6014static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6015                                            struct amdgpu_irq_src *src,
6016                                            unsigned type,
6017                                            enum amdgpu_interrupt_state state)
6018{
6019        switch (type) {
6020        case AMDGPU_CP_IRQ_GFX_EOP:
6021                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6022                break;
6023        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6024                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6025                break;
6026        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6027                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6028                break;
6029        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6030                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6031                break;
6032        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6033                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6034                break;
6035        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6036                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6037                break;
6038        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6039                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6040                break;
6041        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6042                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6043                break;
6044        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6045                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6046                break;
6047        default:
6048                break;
6049        }
6050        return 0;
6051}
6052
6053static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6054                            struct amdgpu_irq_src *source,
6055                            struct amdgpu_iv_entry *entry)
6056{
6057        int i;
6058        u8 me_id, pipe_id, queue_id;
6059        struct amdgpu_ring *ring;
6060
6061        DRM_DEBUG("IH: CP EOP\n");
6062        me_id = (entry->ring_id & 0x0c) >> 2;
6063        pipe_id = (entry->ring_id & 0x03) >> 0;
6064        queue_id = (entry->ring_id & 0x70) >> 4;
6065
6066        switch (me_id) {
6067        case 0:
6068                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6069                break;
6070        case 1:
6071        case 2:
6072                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6073                        ring = &adev->gfx.compute_ring[i];
6074                        /* Per-queue interrupt is supported for MEC starting from VI.
6075                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6076                          */
6077                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6078                                amdgpu_fence_process(ring);
6079                }
6080                break;
6081        }
6082        return 0;
6083}
6084
6085static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6086                                 struct amdgpu_irq_src *source,
6087                                 struct amdgpu_iv_entry *entry)
6088{
6089        DRM_ERROR("Illegal register access in command stream\n");
6090        schedule_work(&adev->reset_work);
6091        return 0;
6092}
6093
6094static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6095                                  struct amdgpu_irq_src *source,
6096                                  struct amdgpu_iv_entry *entry)
6097{
6098        DRM_ERROR("Illegal instruction in command stream\n");
6099        schedule_work(&adev->reset_work);
6100        return 0;
6101}
6102
6103const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6104        .name = "gfx_v8_0",
6105        .early_init = gfx_v8_0_early_init,
6106        .late_init = gfx_v8_0_late_init,
6107        .sw_init = gfx_v8_0_sw_init,
6108        .sw_fini = gfx_v8_0_sw_fini,
6109        .hw_init = gfx_v8_0_hw_init,
6110        .hw_fini = gfx_v8_0_hw_fini,
6111        .suspend = gfx_v8_0_suspend,
6112        .resume = gfx_v8_0_resume,
6113        .is_idle = gfx_v8_0_is_idle,
6114        .wait_for_idle = gfx_v8_0_wait_for_idle,
6115        .soft_reset = gfx_v8_0_soft_reset,
6116        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6117        .set_powergating_state = gfx_v8_0_set_powergating_state,
6118};
6119
6120static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6121        .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6122        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6123        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6124        .parse_cs = NULL,
6125        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6126        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6127        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6128        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6129        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6130        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6131        .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6132        .test_ring = gfx_v8_0_ring_test_ring,
6133        .test_ib = gfx_v8_0_ring_test_ib,
6134        .insert_nop = amdgpu_ring_insert_nop,
6135        .pad_ib = amdgpu_ring_generic_pad_ib,
6136};
6137
6138static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6139        .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6140        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6141        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6142        .parse_cs = NULL,
6143        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6144        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6145        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6146        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6147        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6148        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6149        .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6150        .test_ring = gfx_v8_0_ring_test_ring,
6151        .test_ib = gfx_v8_0_ring_test_ib,
6152        .insert_nop = amdgpu_ring_insert_nop,
6153        .pad_ib = amdgpu_ring_generic_pad_ib,
6154};
6155
6156static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6157{
6158        int i;
6159
6160        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6161                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6162
6163        for (i = 0; i < adev->gfx.num_compute_rings; i++)
6164                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6165}
6166
6167static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6168        .set = gfx_v8_0_set_eop_interrupt_state,
6169        .process = gfx_v8_0_eop_irq,
6170};
6171
6172static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6173        .set = gfx_v8_0_set_priv_reg_fault_state,
6174        .process = gfx_v8_0_priv_reg_irq,
6175};
6176
6177static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6178        .set = gfx_v8_0_set_priv_inst_fault_state,
6179        .process = gfx_v8_0_priv_inst_irq,
6180};
6181
6182static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6183{
6184        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6185        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6186
6187        adev->gfx.priv_reg_irq.num_types = 1;
6188        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6189
6190        adev->gfx.priv_inst_irq.num_types = 1;
6191        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6192}
6193
6194static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6195{
6196        switch (adev->asic_type) {
6197        case CHIP_TOPAZ:
6198        case CHIP_STONEY:
6199                adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6200                break;
6201        case CHIP_CARRIZO:
6202                adev->gfx.rlc.funcs = &cz_rlc_funcs;
6203                break;
6204        default:
6205                adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6206                break;
6207        }
6208}
6209
6210static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6211{
6212        /* init asci gds info */
6213        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6214        adev->gds.gws.total_size = 64;
6215        adev->gds.oa.total_size = 16;
6216
6217        if (adev->gds.mem.total_size == 64 * 1024) {
6218                adev->gds.mem.gfx_partition_size = 4096;
6219                adev->gds.mem.cs_partition_size = 4096;
6220
6221                adev->gds.gws.gfx_partition_size = 4;
6222                adev->gds.gws.cs_partition_size = 4;
6223
6224                adev->gds.oa.gfx_partition_size = 4;
6225                adev->gds.oa.cs_partition_size = 1;
6226        } else {
6227                adev->gds.mem.gfx_partition_size = 1024;
6228                adev->gds.mem.cs_partition_size = 1024;
6229
6230                adev->gds.gws.gfx_partition_size = 16;
6231                adev->gds.gws.cs_partition_size = 16;
6232
6233                adev->gds.oa.gfx_partition_size = 4;
6234                adev->gds.oa.cs_partition_size = 4;
6235        }
6236}
6237
6238static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6239{
6240        u32 data, mask;
6241
6242        data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6243        data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6244
6245        data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6246        data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6247
6248        mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6249
6250        return (~data) & mask;
6251}
6252
6253static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6254{
6255        int i, j, k, counter, active_cu_number = 0;
6256        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6257        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6258
6259        memset(cu_info, 0, sizeof(*cu_info));
6260
6261        mutex_lock(&adev->grbm_idx_mutex);
6262        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6263                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6264                        mask = 1;
6265                        ao_bitmap = 0;
6266                        counter = 0;
6267                        gfx_v8_0_select_se_sh(adev, i, j);
6268                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6269                        cu_info->bitmap[i][j] = bitmap;
6270
6271                        for (k = 0; k < 16; k ++) {
6272                                if (bitmap & mask) {
6273                                        if (counter < 2)
6274                                                ao_bitmap |= mask;
6275                                        counter ++;
6276                                }
6277                                mask <<= 1;
6278                        }
6279                        active_cu_number += counter;
6280                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6281                }
6282        }
6283        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6284        mutex_unlock(&adev->grbm_idx_mutex);
6285
6286        cu_info->number = active_cu_number;
6287        cu_info->ao_cu_mask = ao_cu_mask;
6288}
6289