linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include <drm/drmP.h>
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
  27#include "vi.h"
  28#include "vi_structs.h"
  29#include "vid.h"
  30#include "amdgpu_ucode.h"
  31#include "amdgpu_atombios.h"
  32#include "atombios_i2c.h"
  33#include "clearstate_vi.h"
  34
  35#include "gmc/gmc_8_2_d.h"
  36#include "gmc/gmc_8_2_sh_mask.h"
  37
  38#include "oss/oss_3_0_d.h"
  39#include "oss/oss_3_0_sh_mask.h"
  40
  41#include "bif/bif_5_0_d.h"
  42#include "bif/bif_5_0_sh_mask.h"
  43#include "gca/gfx_8_0_d.h"
  44#include "gca/gfx_8_0_enum.h"
  45#include "gca/gfx_8_0_sh_mask.h"
  46#include "gca/gfx_8_0_enum.h"
  47
  48#include "dce/dce_10_0_d.h"
  49#include "dce/dce_10_0_sh_mask.h"
  50
  51#include "smu/smu_7_1_3_d.h"
  52
  53#define GFX8_NUM_GFX_RINGS     1
  54#define GFX8_MEC_HPD_SIZE 2048
  55
  56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  59#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  60
  61#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  62#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  63#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  64#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  65#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  66#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  67#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  68#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  69#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  70
  71#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  72#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  73#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  74#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  76#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  77
  78/* BPM SERDES CMD */
  79#define SET_BPM_SERDES_CMD    1
  80#define CLE_BPM_SERDES_CMD    0
  81
  82/* BPM Register Address*/
  83enum {
  84        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  85        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  86        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  87        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  88        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  89        BPM_REG_FGCG_MAX
  90};
  91
  92#define RLC_FormatDirectRegListLength        14
  93
  94MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  95MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 100
 101MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 102MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 108MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 115MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 119
 120MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 121MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 140
 141MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 142MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 143MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 144MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 145MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 146MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 147
 148static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 149{
 150        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 151        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 152        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 153        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 154        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 155        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 156        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 157        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 158        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 159        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 160        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 161        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 162        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 163        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 164        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 165        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 166};
 167
 168static const u32 golden_settings_tonga_a11[] =
 169{
 170        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 171        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 172        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 173        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 174        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 175        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 176        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 177        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 178        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 179        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 180        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 181        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 182        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 183        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 184        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 185        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 186};
 187
 188static const u32 tonga_golden_common_all[] =
 189{
 190        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 191        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 192        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 193        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 194        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 195        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 196        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 197        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 198};
 199
 200static const u32 tonga_mgcg_cgcg_init[] =
 201{
 202        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 203        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 204        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 205        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 206        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 207        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 208        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 209        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 210        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 211        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 212        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 213        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 214        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 215        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 216        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 217        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 218        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 219        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 220        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 221        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 222        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 223        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 224        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 225        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 226        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 227        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 228        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 229        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 230        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 231        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 232        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 234        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 235        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 236        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 237        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 238        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 239        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 240        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 241        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 242        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 243        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 244        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 245        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 246        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 247        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 248        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 249        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 250        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 251        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 252        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 253        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 254        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 255        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 256        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 257        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 258        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 259        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 260        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 261        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 262        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 263        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 264        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 265        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 266        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 267        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 268        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 269        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 270        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 271        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 272        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 273        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 274        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 275        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 276        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 277};
 278
 279static const u32 golden_settings_polaris11_a11[] =
 280{
 281        mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 282        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 283        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 284        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 285        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 286        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 287        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 288        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 289        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 290        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 291        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 292        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 293        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 294        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 295        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 296        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 297        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 298};
 299
 300static const u32 polaris11_golden_common_all[] =
 301{
 302        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 303        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 304        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 305        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 306        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 307        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 308};
 309
 310static const u32 golden_settings_polaris10_a11[] =
 311{
 312        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 313        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 314        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 315        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 316        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 317        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 318        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 319        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 320        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 321        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 322        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 323        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 324        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 325        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 326        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 327        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 328        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 329};
 330
 331static const u32 polaris10_golden_common_all[] =
 332{
 333        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 334        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 335        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 336        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 337        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 338        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 339        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 340        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 341};
 342
 343static const u32 fiji_golden_common_all[] =
 344{
 345        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 346        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 347        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 348        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 349        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 350        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 351        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 352        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 353        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 354        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 355};
 356
 357static const u32 golden_settings_fiji_a10[] =
 358{
 359        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 360        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 361        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 362        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 363        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 364        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 365        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 366        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 367        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 368        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 369        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 370};
 371
 372static const u32 fiji_mgcg_cgcg_init[] =
 373{
 374        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 375        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 376        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 377        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 378        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 379        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 380        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 381        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 382        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 383        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 384        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 385        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 386        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 387        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 388        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 389        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 390        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 391        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 392        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 393        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 394        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 395        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 396        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 397        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 398        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 399        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 400        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 401        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 402        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 403        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 404        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 405        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 406        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 407        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 408        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 409};
 410
 411static const u32 golden_settings_iceland_a11[] =
 412{
 413        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 414        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 415        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 416        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 417        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 418        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 419        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 420        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 421        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 422        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 423        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 424        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 425        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 426        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 427        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 428        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 429};
 430
 431static const u32 iceland_golden_common_all[] =
 432{
 433        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 434        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 435        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 436        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 437        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 438        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 439        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 440        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 441};
 442
 443static const u32 iceland_mgcg_cgcg_init[] =
 444{
 445        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 446        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 447        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 448        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 449        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 450        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 451        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 452        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 453        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 454        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 455        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 456        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 457        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 458        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 459        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 460        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 461        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 462        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 463        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 464        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 465        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 466        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 467        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 468        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 469        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 470        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 471        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 472        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 473        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 474        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 475        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 476        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 477        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 478        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 479        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 480        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 481        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 482        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 483        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 484        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 485        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 486        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 487        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 488        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 489        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 490        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 491        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 492        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 493        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 494        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 495        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 496        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 497        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 498        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 499        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 500        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 501        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 502        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 503        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 504        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 505        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 506        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 507        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 508        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 509};
 510
 511static const u32 cz_golden_settings_a11[] =
 512{
 513        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 514        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 515        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 516        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 517        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 518        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 519        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 520        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 521        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 522        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 523        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 524        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 525};
 526
 527static const u32 cz_golden_common_all[] =
 528{
 529        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 530        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 531        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 532        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 533        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 534        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 535        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 536        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 537};
 538
 539static const u32 cz_mgcg_cgcg_init[] =
 540{
 541        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 542        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 543        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 544        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 545        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 546        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 547        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 548        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 549        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 550        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 551        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 552        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 553        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 554        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 555        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 556        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 557        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 558        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 559        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 560        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 561        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 562        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 563        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 564        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 565        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 566        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 567        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 568        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 569        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 570        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 571        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 572        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 573        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 574        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 575        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 576        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 577        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 578        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 579        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 580        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 581        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 582        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 583        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 584        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 585        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 586        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 587        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 588        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 589        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 590        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 591        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 592        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 593        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 594        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 595        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 596        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 597        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 598        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 599        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 600        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 601        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 602        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 603        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 604        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 605        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 606        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 607        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 608        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 609        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 610        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 611        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 612        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 613        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 614        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 615        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 616};
 617
 618static const u32 stoney_golden_settings_a11[] =
 619{
 620        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 621        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 622        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 623        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 624        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 625        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 626        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 627        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 628        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 629        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 630};
 631
 632static const u32 stoney_golden_common_all[] =
 633{
 634        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 635        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 636        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 637        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 638        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 639        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 640        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 641        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 642};
 643
 644static const u32 stoney_mgcg_cgcg_init[] =
 645{
 646        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 647        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 648        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 649        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 650        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 651};
 652
 653static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 654static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 655static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 656static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 657static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 658static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 659static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 660static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 661
 662static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 663{
 664        switch (adev->asic_type) {
 665        case CHIP_TOPAZ:
 666                amdgpu_program_register_sequence(adev,
 667                                                 iceland_mgcg_cgcg_init,
 668                                                 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 669                amdgpu_program_register_sequence(adev,
 670                                                 golden_settings_iceland_a11,
 671                                                 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 672                amdgpu_program_register_sequence(adev,
 673                                                 iceland_golden_common_all,
 674                                                 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 675                break;
 676        case CHIP_FIJI:
 677                amdgpu_program_register_sequence(adev,
 678                                                 fiji_mgcg_cgcg_init,
 679                                                 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 680                amdgpu_program_register_sequence(adev,
 681                                                 golden_settings_fiji_a10,
 682                                                 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 683                amdgpu_program_register_sequence(adev,
 684                                                 fiji_golden_common_all,
 685                                                 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 686                break;
 687
 688        case CHIP_TONGA:
 689                amdgpu_program_register_sequence(adev,
 690                                                 tonga_mgcg_cgcg_init,
 691                                                 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 692                amdgpu_program_register_sequence(adev,
 693                                                 golden_settings_tonga_a11,
 694                                                 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 695                amdgpu_program_register_sequence(adev,
 696                                                 tonga_golden_common_all,
 697                                                 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 698                break;
 699        case CHIP_POLARIS11:
 700        case CHIP_POLARIS12:
 701                amdgpu_program_register_sequence(adev,
 702                                                 golden_settings_polaris11_a11,
 703                                                 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
 704                amdgpu_program_register_sequence(adev,
 705                                                 polaris11_golden_common_all,
 706                                                 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
 707                break;
 708        case CHIP_POLARIS10:
 709                amdgpu_program_register_sequence(adev,
 710                                                 golden_settings_polaris10_a11,
 711                                                 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
 712                amdgpu_program_register_sequence(adev,
 713                                                 polaris10_golden_common_all,
 714                                                 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
 715                WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 716                if (adev->pdev->revision == 0xc7 &&
 717                    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 718                     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 719                     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 720                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 721                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 722                }
 723                break;
 724        case CHIP_CARRIZO:
 725                amdgpu_program_register_sequence(adev,
 726                                                 cz_mgcg_cgcg_init,
 727                                                 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 728                amdgpu_program_register_sequence(adev,
 729                                                 cz_golden_settings_a11,
 730                                                 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 731                amdgpu_program_register_sequence(adev,
 732                                                 cz_golden_common_all,
 733                                                 (const u32)ARRAY_SIZE(cz_golden_common_all));
 734                break;
 735        case CHIP_STONEY:
 736                amdgpu_program_register_sequence(adev,
 737                                                 stoney_mgcg_cgcg_init,
 738                                                 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 739                amdgpu_program_register_sequence(adev,
 740                                                 stoney_golden_settings_a11,
 741                                                 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 742                amdgpu_program_register_sequence(adev,
 743                                                 stoney_golden_common_all,
 744                                                 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 745                break;
 746        default:
 747                break;
 748        }
 749}
 750
 751static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 752{
 753        adev->gfx.scratch.num_reg = 7;
 754        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 755        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 756}
 757
 758static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 759{
 760        struct amdgpu_device *adev = ring->adev;
 761        uint32_t scratch;
 762        uint32_t tmp = 0;
 763        unsigned i;
 764        int r;
 765
 766        r = amdgpu_gfx_scratch_get(adev, &scratch);
 767        if (r) {
 768                DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 769                return r;
 770        }
 771        WREG32(scratch, 0xCAFEDEAD);
 772        r = amdgpu_ring_alloc(ring, 3);
 773        if (r) {
 774                DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 775                          ring->idx, r);
 776                amdgpu_gfx_scratch_free(adev, scratch);
 777                return r;
 778        }
 779        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 780        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 781        amdgpu_ring_write(ring, 0xDEADBEEF);
 782        amdgpu_ring_commit(ring);
 783
 784        for (i = 0; i < adev->usec_timeout; i++) {
 785                tmp = RREG32(scratch);
 786                if (tmp == 0xDEADBEEF)
 787                        break;
 788                DRM_UDELAY(1);
 789        }
 790        if (i < adev->usec_timeout) {
 791                DRM_INFO("ring test on %d succeeded in %d usecs\n",
 792                         ring->idx, i);
 793        } else {
 794                DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 795                          ring->idx, scratch, tmp);
 796                r = -EINVAL;
 797        }
 798        amdgpu_gfx_scratch_free(adev, scratch);
 799        return r;
 800}
 801
 802static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 803{
 804        struct amdgpu_device *adev = ring->adev;
 805        struct amdgpu_ib ib;
 806        struct dma_fence *f = NULL;
 807        uint32_t scratch;
 808        uint32_t tmp = 0;
 809        long r;
 810
 811        r = amdgpu_gfx_scratch_get(adev, &scratch);
 812        if (r) {
 813                DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 814                return r;
 815        }
 816        WREG32(scratch, 0xCAFEDEAD);
 817        memset(&ib, 0, sizeof(ib));
 818        r = amdgpu_ib_get(adev, NULL, 256, &ib);
 819        if (r) {
 820                DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 821                goto err1;
 822        }
 823        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 824        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 825        ib.ptr[2] = 0xDEADBEEF;
 826        ib.length_dw = 3;
 827
 828        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 829        if (r)
 830                goto err2;
 831
 832        r = dma_fence_wait_timeout(f, false, timeout);
 833        if (r == 0) {
 834                DRM_ERROR("amdgpu: IB test timed out.\n");
 835                r = -ETIMEDOUT;
 836                goto err2;
 837        } else if (r < 0) {
 838                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 839                goto err2;
 840        }
 841        tmp = RREG32(scratch);
 842        if (tmp == 0xDEADBEEF) {
 843                DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
 844                r = 0;
 845        } else {
 846                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 847                          scratch, tmp);
 848                r = -EINVAL;
 849        }
 850err2:
 851        amdgpu_ib_free(adev, &ib, NULL);
 852        dma_fence_put(f);
 853err1:
 854        amdgpu_gfx_scratch_free(adev, scratch);
 855        return r;
 856}
 857
 858
 859static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 860{
 861        release_firmware(adev->gfx.pfp_fw);
 862        adev->gfx.pfp_fw = NULL;
 863        release_firmware(adev->gfx.me_fw);
 864        adev->gfx.me_fw = NULL;
 865        release_firmware(adev->gfx.ce_fw);
 866        adev->gfx.ce_fw = NULL;
 867        release_firmware(adev->gfx.rlc_fw);
 868        adev->gfx.rlc_fw = NULL;
 869        release_firmware(adev->gfx.mec_fw);
 870        adev->gfx.mec_fw = NULL;
 871        if ((adev->asic_type != CHIP_STONEY) &&
 872            (adev->asic_type != CHIP_TOPAZ))
 873                release_firmware(adev->gfx.mec2_fw);
 874        adev->gfx.mec2_fw = NULL;
 875
 876        kfree(adev->gfx.rlc.register_list_format);
 877}
 878
 879static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 880{
 881        const char *chip_name;
 882        char fw_name[30];
 883        int err;
 884        struct amdgpu_firmware_info *info = NULL;
 885        const struct common_firmware_header *header = NULL;
 886        const struct gfx_firmware_header_v1_0 *cp_hdr;
 887        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 888        unsigned int *tmp = NULL, i;
 889
 890        DRM_DEBUG("\n");
 891
 892        switch (adev->asic_type) {
 893        case CHIP_TOPAZ:
 894                chip_name = "topaz";
 895                break;
 896        case CHIP_TONGA:
 897                chip_name = "tonga";
 898                break;
 899        case CHIP_CARRIZO:
 900                chip_name = "carrizo";
 901                break;
 902        case CHIP_FIJI:
 903                chip_name = "fiji";
 904                break;
 905        case CHIP_POLARIS11:
 906                chip_name = "polaris11";
 907                break;
 908        case CHIP_POLARIS10:
 909                chip_name = "polaris10";
 910                break;
 911        case CHIP_POLARIS12:
 912                chip_name = "polaris12";
 913                break;
 914        case CHIP_STONEY:
 915                chip_name = "stoney";
 916                break;
 917        default:
 918                BUG();
 919        }
 920
 921        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 922        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 923        if (err)
 924                goto out;
 925        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 926        if (err)
 927                goto out;
 928        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 929        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 930        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 931
 932        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 933        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 934        if (err)
 935                goto out;
 936        err = amdgpu_ucode_validate(adev->gfx.me_fw);
 937        if (err)
 938                goto out;
 939        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 940        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 941
 942        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 943
 944        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 945        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 946        if (err)
 947                goto out;
 948        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 949        if (err)
 950                goto out;
 951        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 952        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 953        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 954
 955        /*
 956         * Support for MCBP/Virtualization in combination with chained IBs is
 957         * formal released on feature version #46
 958         */
 959        if (adev->gfx.ce_feature_version >= 46 &&
 960            adev->gfx.pfp_feature_version >= 46) {
 961                adev->virt.chained_ib_support = true;
 962                DRM_INFO("Chained IB support enabled!\n");
 963        } else
 964                adev->virt.chained_ib_support = false;
 965
 966        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 967        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 968        if (err)
 969                goto out;
 970        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 971        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 972        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 973        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 974
 975        adev->gfx.rlc.save_and_restore_offset =
 976                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
 977        adev->gfx.rlc.clear_state_descriptor_offset =
 978                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 979        adev->gfx.rlc.avail_scratch_ram_locations =
 980                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 981        adev->gfx.rlc.reg_restore_list_size =
 982                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
 983        adev->gfx.rlc.reg_list_format_start =
 984                        le32_to_cpu(rlc_hdr->reg_list_format_start);
 985        adev->gfx.rlc.reg_list_format_separate_start =
 986                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 987        adev->gfx.rlc.starting_offsets_start =
 988                        le32_to_cpu(rlc_hdr->starting_offsets_start);
 989        adev->gfx.rlc.reg_list_format_size_bytes =
 990                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 991        adev->gfx.rlc.reg_list_size_bytes =
 992                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 993
 994        adev->gfx.rlc.register_list_format =
 995                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 996                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 997
 998        if (!adev->gfx.rlc.register_list_format) {
 999                err = -ENOMEM;
1000                goto out;
1001        }
1002
1003        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005        for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012        for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017        if (err)
1018                goto out;
1019        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020        if (err)
1021                goto out;
1022        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026        if ((adev->asic_type != CHIP_STONEY) &&
1027            (adev->asic_type != CHIP_TOPAZ)) {
1028                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030                if (!err) {
1031                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032                        if (err)
1033                                goto out;
1034                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035                                adev->gfx.mec2_fw->data;
1036                        adev->gfx.mec2_fw_version =
1037                                le32_to_cpu(cp_hdr->header.ucode_version);
1038                        adev->gfx.mec2_feature_version =
1039                                le32_to_cpu(cp_hdr->ucode_feature_version);
1040                } else {
1041                        err = 0;
1042                        adev->gfx.mec2_fw = NULL;
1043                }
1044        }
1045
1046        if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048                info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049                info->fw = adev->gfx.pfp_fw;
1050                header = (const struct common_firmware_header *)info->fw->data;
1051                adev->firmware.fw_size +=
1052                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055                info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056                info->fw = adev->gfx.me_fw;
1057                header = (const struct common_firmware_header *)info->fw->data;
1058                adev->firmware.fw_size +=
1059                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062                info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063                info->fw = adev->gfx.ce_fw;
1064                header = (const struct common_firmware_header *)info->fw->data;
1065                adev->firmware.fw_size +=
1066                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069                info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070                info->fw = adev->gfx.rlc_fw;
1071                header = (const struct common_firmware_header *)info->fw->data;
1072                adev->firmware.fw_size +=
1073                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077                info->fw = adev->gfx.mec_fw;
1078                header = (const struct common_firmware_header *)info->fw->data;
1079                adev->firmware.fw_size +=
1080                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082                /* we need account JT in */
1083                cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084                adev->firmware.fw_size +=
1085                        ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087                if (amdgpu_sriov_vf(adev)) {
1088                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089                        info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090                        info->fw = adev->gfx.mec_fw;
1091                        adev->firmware.fw_size +=
1092                                ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093                }
1094
1095                if (adev->gfx.mec2_fw) {
1096                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097                        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098                        info->fw = adev->gfx.mec2_fw;
1099                        header = (const struct common_firmware_header *)info->fw->data;
1100                        adev->firmware.fw_size +=
1101                                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102                }
1103
1104        }
1105
1106out:
1107        if (err) {
1108                dev_err(adev->dev,
1109                        "gfx8: Failed to load firmware \"%s\"\n",
1110                        fw_name);
1111                release_firmware(adev->gfx.pfp_fw);
1112                adev->gfx.pfp_fw = NULL;
1113                release_firmware(adev->gfx.me_fw);
1114                adev->gfx.me_fw = NULL;
1115                release_firmware(adev->gfx.ce_fw);
1116                adev->gfx.ce_fw = NULL;
1117                release_firmware(adev->gfx.rlc_fw);
1118                adev->gfx.rlc_fw = NULL;
1119                release_firmware(adev->gfx.mec_fw);
1120                adev->gfx.mec_fw = NULL;
1121                release_firmware(adev->gfx.mec2_fw);
1122                adev->gfx.mec2_fw = NULL;
1123        }
1124        return err;
1125}
1126
1127static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128                                    volatile u32 *buffer)
1129{
1130        u32 count = 0, i;
1131        const struct cs_section_def *sect = NULL;
1132        const struct cs_extent_def *ext = NULL;
1133
1134        if (adev->gfx.rlc.cs_data == NULL)
1135                return;
1136        if (buffer == NULL)
1137                return;
1138
1139        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143        buffer[count++] = cpu_to_le32(0x80000000);
1144        buffer[count++] = cpu_to_le32(0x80000000);
1145
1146        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147                for (ext = sect->section; ext->extent != NULL; ++ext) {
1148                        if (sect->id == SECT_CONTEXT) {
1149                                buffer[count++] =
1150                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151                                buffer[count++] = cpu_to_le32(ext->reg_index -
1152                                                PACKET3_SET_CONTEXT_REG_START);
1153                                for (i = 0; i < ext->reg_count; i++)
1154                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1155                        } else {
1156                                return;
1157                        }
1158                }
1159        }
1160
1161        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163                        PACKET3_SET_CONTEXT_REG_START);
1164        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171        buffer[count++] = cpu_to_le32(0);
1172}
1173
1174static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175{
1176        const __le32 *fw_data;
1177        volatile u32 *dst_ptr;
1178        int me, i, max_me = 4;
1179        u32 bo_offset = 0;
1180        u32 table_offset, table_size;
1181
1182        if (adev->asic_type == CHIP_CARRIZO)
1183                max_me = 5;
1184
1185        /* write the cp table buffer */
1186        dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187        for (me = 0; me < max_me; me++) {
1188                if (me == 0) {
1189                        const struct gfx_firmware_header_v1_0 *hdr =
1190                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191                        fw_data = (const __le32 *)
1192                                (adev->gfx.ce_fw->data +
1193                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                        table_offset = le32_to_cpu(hdr->jt_offset);
1195                        table_size = le32_to_cpu(hdr->jt_size);
1196                } else if (me == 1) {
1197                        const struct gfx_firmware_header_v1_0 *hdr =
1198                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199                        fw_data = (const __le32 *)
1200                                (adev->gfx.pfp_fw->data +
1201                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                        table_offset = le32_to_cpu(hdr->jt_offset);
1203                        table_size = le32_to_cpu(hdr->jt_size);
1204                } else if (me == 2) {
1205                        const struct gfx_firmware_header_v1_0 *hdr =
1206                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207                        fw_data = (const __le32 *)
1208                                (adev->gfx.me_fw->data +
1209                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                        table_offset = le32_to_cpu(hdr->jt_offset);
1211                        table_size = le32_to_cpu(hdr->jt_size);
1212                } else if (me == 3) {
1213                        const struct gfx_firmware_header_v1_0 *hdr =
1214                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215                        fw_data = (const __le32 *)
1216                                (adev->gfx.mec_fw->data +
1217                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                        table_offset = le32_to_cpu(hdr->jt_offset);
1219                        table_size = le32_to_cpu(hdr->jt_size);
1220                } else  if (me == 4) {
1221                        const struct gfx_firmware_header_v1_0 *hdr =
1222                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223                        fw_data = (const __le32 *)
1224                                (adev->gfx.mec2_fw->data +
1225                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226                        table_offset = le32_to_cpu(hdr->jt_offset);
1227                        table_size = le32_to_cpu(hdr->jt_size);
1228                }
1229
1230                for (i = 0; i < table_size; i ++) {
1231                        dst_ptr[bo_offset + i] =
1232                                cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233                }
1234
1235                bo_offset += table_size;
1236        }
1237}
1238
1239static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240{
1241        int r;
1242
1243        /* clear state block */
1244        if (adev->gfx.rlc.clear_state_obj) {
1245                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1246                if (unlikely(r != 0))
1247                        dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1248                amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1249                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1250                amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1251                adev->gfx.rlc.clear_state_obj = NULL;
1252        }
1253
1254        /* jump table block */
1255        if (adev->gfx.rlc.cp_table_obj) {
1256                r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1257                if (unlikely(r != 0))
1258                        dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1259                amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1260                amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1261                amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1262                adev->gfx.rlc.cp_table_obj = NULL;
1263        }
1264}
1265
1266static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1267{
1268        volatile u32 *dst_ptr;
1269        u32 dws;
1270        const struct cs_section_def *cs_data;
1271        int r;
1272
1273        adev->gfx.rlc.cs_data = vi_cs_data;
1274
1275        cs_data = adev->gfx.rlc.cs_data;
1276
1277        if (cs_data) {
1278                /* clear state block */
1279                adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1280
1281                if (adev->gfx.rlc.clear_state_obj == NULL) {
1282                        r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1283                                             AMDGPU_GEM_DOMAIN_VRAM,
1284                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1285                                             AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1286                                             NULL, NULL,
1287                                             &adev->gfx.rlc.clear_state_obj);
1288                        if (r) {
1289                                dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1290                                gfx_v8_0_rlc_fini(adev);
1291                                return r;
1292                        }
1293                }
1294                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1295                if (unlikely(r != 0)) {
1296                        gfx_v8_0_rlc_fini(adev);
1297                        return r;
1298                }
1299                r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1300                                  &adev->gfx.rlc.clear_state_gpu_addr);
1301                if (r) {
1302                        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1303                        dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1304                        gfx_v8_0_rlc_fini(adev);
1305                        return r;
1306                }
1307
1308                r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1309                if (r) {
1310                        dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1311                        gfx_v8_0_rlc_fini(adev);
1312                        return r;
1313                }
1314                /* set up the cs buffer */
1315                dst_ptr = adev->gfx.rlc.cs_ptr;
1316                gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1317                amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1318                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1319        }
1320
1321        if ((adev->asic_type == CHIP_CARRIZO) ||
1322            (adev->asic_type == CHIP_STONEY)) {
1323                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1324                if (adev->gfx.rlc.cp_table_obj == NULL) {
1325                        r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1326                                             AMDGPU_GEM_DOMAIN_VRAM,
1327                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1328                                             AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1329                                             NULL, NULL,
1330                                             &adev->gfx.rlc.cp_table_obj);
1331                        if (r) {
1332                                dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1333                                return r;
1334                        }
1335                }
1336
1337                r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1338                if (unlikely(r != 0)) {
1339                        dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1340                        return r;
1341                }
1342                r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1343                                  &adev->gfx.rlc.cp_table_gpu_addr);
1344                if (r) {
1345                        amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1346                        dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1347                        return r;
1348                }
1349                r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1350                if (r) {
1351                        dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1352                        return r;
1353                }
1354
1355                cz_init_cp_jump_table(adev);
1356
1357                amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1358                amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1359        }
1360
1361        return 0;
1362}
1363
1364static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1365{
1366        int r;
1367
1368        if (adev->gfx.mec.hpd_eop_obj) {
1369                r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1370                if (unlikely(r != 0))
1371                        dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1372                amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1373                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1374                amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1375                adev->gfx.mec.hpd_eop_obj = NULL;
1376        }
1377}
1378
1379static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1380{
1381        int r;
1382        u32 *hpd;
1383        size_t mec_hpd_size;
1384
1385        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1386
1387        /* take ownership of the relevant compute queues */
1388        amdgpu_gfx_compute_queue_acquire(adev);
1389
1390        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1391
1392        if (adev->gfx.mec.hpd_eop_obj == NULL) {
1393                r = amdgpu_bo_create(adev,
1394                                     mec_hpd_size,
1395                                     PAGE_SIZE, true,
1396                                     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1397                                     &adev->gfx.mec.hpd_eop_obj);
1398                if (r) {
1399                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1400                        return r;
1401                }
1402        }
1403
1404        r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1405        if (unlikely(r != 0)) {
1406                gfx_v8_0_mec_fini(adev);
1407                return r;
1408        }
1409        r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1410                          &adev->gfx.mec.hpd_eop_gpu_addr);
1411        if (r) {
1412                dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1413                gfx_v8_0_mec_fini(adev);
1414                return r;
1415        }
1416        r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1417        if (r) {
1418                dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1419                gfx_v8_0_mec_fini(adev);
1420                return r;
1421        }
1422
1423        memset(hpd, 0, mec_hpd_size);
1424
1425        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1426        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1427
1428        return 0;
1429}
1430
1431static const u32 vgpr_init_compute_shader[] =
1432{
1433        0x7e000209, 0x7e020208,
1434        0x7e040207, 0x7e060206,
1435        0x7e080205, 0x7e0a0204,
1436        0x7e0c0203, 0x7e0e0202,
1437        0x7e100201, 0x7e120200,
1438        0x7e140209, 0x7e160208,
1439        0x7e180207, 0x7e1a0206,
1440        0x7e1c0205, 0x7e1e0204,
1441        0x7e200203, 0x7e220202,
1442        0x7e240201, 0x7e260200,
1443        0x7e280209, 0x7e2a0208,
1444        0x7e2c0207, 0x7e2e0206,
1445        0x7e300205, 0x7e320204,
1446        0x7e340203, 0x7e360202,
1447        0x7e380201, 0x7e3a0200,
1448        0x7e3c0209, 0x7e3e0208,
1449        0x7e400207, 0x7e420206,
1450        0x7e440205, 0x7e460204,
1451        0x7e480203, 0x7e4a0202,
1452        0x7e4c0201, 0x7e4e0200,
1453        0x7e500209, 0x7e520208,
1454        0x7e540207, 0x7e560206,
1455        0x7e580205, 0x7e5a0204,
1456        0x7e5c0203, 0x7e5e0202,
1457        0x7e600201, 0x7e620200,
1458        0x7e640209, 0x7e660208,
1459        0x7e680207, 0x7e6a0206,
1460        0x7e6c0205, 0x7e6e0204,
1461        0x7e700203, 0x7e720202,
1462        0x7e740201, 0x7e760200,
1463        0x7e780209, 0x7e7a0208,
1464        0x7e7c0207, 0x7e7e0206,
1465        0xbf8a0000, 0xbf810000,
1466};
1467
1468static const u32 sgpr_init_compute_shader[] =
1469{
1470        0xbe8a0100, 0xbe8c0102,
1471        0xbe8e0104, 0xbe900106,
1472        0xbe920108, 0xbe940100,
1473        0xbe960102, 0xbe980104,
1474        0xbe9a0106, 0xbe9c0108,
1475        0xbe9e0100, 0xbea00102,
1476        0xbea20104, 0xbea40106,
1477        0xbea60108, 0xbea80100,
1478        0xbeaa0102, 0xbeac0104,
1479        0xbeae0106, 0xbeb00108,
1480        0xbeb20100, 0xbeb40102,
1481        0xbeb60104, 0xbeb80106,
1482        0xbeba0108, 0xbebc0100,
1483        0xbebe0102, 0xbec00104,
1484        0xbec20106, 0xbec40108,
1485        0xbec60100, 0xbec80102,
1486        0xbee60004, 0xbee70005,
1487        0xbeea0006, 0xbeeb0007,
1488        0xbee80008, 0xbee90009,
1489        0xbefc0000, 0xbf8a0000,
1490        0xbf810000, 0x00000000,
1491};
1492
1493static const u32 vgpr_init_regs[] =
1494{
1495        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1496        mmCOMPUTE_RESOURCE_LIMITS, 0,
1497        mmCOMPUTE_NUM_THREAD_X, 256*4,
1498        mmCOMPUTE_NUM_THREAD_Y, 1,
1499        mmCOMPUTE_NUM_THREAD_Z, 1,
1500        mmCOMPUTE_PGM_RSRC2, 20,
1501        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1502        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1503        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1504        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1505        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1506        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1507        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1508        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1509        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1510        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1511};
1512
1513static const u32 sgpr1_init_regs[] =
1514{
1515        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1516        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1517        mmCOMPUTE_NUM_THREAD_X, 256*5,
1518        mmCOMPUTE_NUM_THREAD_Y, 1,
1519        mmCOMPUTE_NUM_THREAD_Z, 1,
1520        mmCOMPUTE_PGM_RSRC2, 20,
1521        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1522        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1523        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1524        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1525        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1526        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1527        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1528        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1529        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1530        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1531};
1532
1533static const u32 sgpr2_init_regs[] =
1534{
1535        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1536        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1537        mmCOMPUTE_NUM_THREAD_X, 256*5,
1538        mmCOMPUTE_NUM_THREAD_Y, 1,
1539        mmCOMPUTE_NUM_THREAD_Z, 1,
1540        mmCOMPUTE_PGM_RSRC2, 20,
1541        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1542        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1543        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1544        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1545        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1546        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1547        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1548        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1549        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1550        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1551};
1552
1553static const u32 sec_ded_counter_registers[] =
1554{
1555        mmCPC_EDC_ATC_CNT,
1556        mmCPC_EDC_SCRATCH_CNT,
1557        mmCPC_EDC_UCODE_CNT,
1558        mmCPF_EDC_ATC_CNT,
1559        mmCPF_EDC_ROQ_CNT,
1560        mmCPF_EDC_TAG_CNT,
1561        mmCPG_EDC_ATC_CNT,
1562        mmCPG_EDC_DMA_CNT,
1563        mmCPG_EDC_TAG_CNT,
1564        mmDC_EDC_CSINVOC_CNT,
1565        mmDC_EDC_RESTORE_CNT,
1566        mmDC_EDC_STATE_CNT,
1567        mmGDS_EDC_CNT,
1568        mmGDS_EDC_GRBM_CNT,
1569        mmGDS_EDC_OA_DED,
1570        mmSPI_EDC_CNT,
1571        mmSQC_ATC_EDC_GATCL1_CNT,
1572        mmSQC_EDC_CNT,
1573        mmSQ_EDC_DED_CNT,
1574        mmSQ_EDC_INFO,
1575        mmSQ_EDC_SEC_CNT,
1576        mmTCC_EDC_CNT,
1577        mmTCP_ATC_EDC_GATCL1_CNT,
1578        mmTCP_EDC_CNT,
1579        mmTD_EDC_CNT
1580};
1581
1582static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1583{
1584        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1585        struct amdgpu_ib ib;
1586        struct dma_fence *f = NULL;
1587        int r, i;
1588        u32 tmp;
1589        unsigned total_size, vgpr_offset, sgpr_offset;
1590        u64 gpu_addr;
1591
1592        /* only supported on CZ */
1593        if (adev->asic_type != CHIP_CARRIZO)
1594                return 0;
1595
1596        /* bail if the compute ring is not ready */
1597        if (!ring->ready)
1598                return 0;
1599
1600        tmp = RREG32(mmGB_EDC_MODE);
1601        WREG32(mmGB_EDC_MODE, 0);
1602
1603        total_size =
1604                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1605        total_size +=
1606                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1607        total_size +=
1608                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1609        total_size = ALIGN(total_size, 256);
1610        vgpr_offset = total_size;
1611        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1612        sgpr_offset = total_size;
1613        total_size += sizeof(sgpr_init_compute_shader);
1614
1615        /* allocate an indirect buffer to put the commands in */
1616        memset(&ib, 0, sizeof(ib));
1617        r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1618        if (r) {
1619                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1620                return r;
1621        }
1622
1623        /* load the compute shaders */
1624        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1625                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1626
1627        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1628                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1629
1630        /* init the ib length to 0 */
1631        ib.length_dw = 0;
1632
1633        /* VGPR */
1634        /* write the register state for the compute dispatch */
1635        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1636                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1638                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1639        }
1640        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1642        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647        /* write dispatch packet */
1648        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649        ib.ptr[ib.length_dw++] = 8; /* x */
1650        ib.ptr[ib.length_dw++] = 1; /* y */
1651        ib.ptr[ib.length_dw++] = 1; /* z */
1652        ib.ptr[ib.length_dw++] =
1653                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655        /* write CS partial flush packet */
1656        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659        /* SGPR1 */
1660        /* write the register state for the compute dispatch */
1661        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1662                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1663                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1664                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1665        }
1666        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1667        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1668        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1669        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1670        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1671        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1672
1673        /* write dispatch packet */
1674        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1675        ib.ptr[ib.length_dw++] = 8; /* x */
1676        ib.ptr[ib.length_dw++] = 1; /* y */
1677        ib.ptr[ib.length_dw++] = 1; /* z */
1678        ib.ptr[ib.length_dw++] =
1679                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1680
1681        /* write CS partial flush packet */
1682        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1683        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1684
1685        /* SGPR2 */
1686        /* write the register state for the compute dispatch */
1687        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1688                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1689                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1690                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1691        }
1692        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1693        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1694        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1695        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1696        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1697        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1698
1699        /* write dispatch packet */
1700        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1701        ib.ptr[ib.length_dw++] = 8; /* x */
1702        ib.ptr[ib.length_dw++] = 1; /* y */
1703        ib.ptr[ib.length_dw++] = 1; /* z */
1704        ib.ptr[ib.length_dw++] =
1705                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1706
1707        /* write CS partial flush packet */
1708        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1709        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1710
1711        /* shedule the ib on the ring */
1712        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1713        if (r) {
1714                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1715                goto fail;
1716        }
1717
1718        /* wait for the GPU to finish processing the IB */
1719        r = dma_fence_wait(f, false);
1720        if (r) {
1721                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1722                goto fail;
1723        }
1724
1725        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1726        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1727        WREG32(mmGB_EDC_MODE, tmp);
1728
1729        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1730        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1731        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1732
1733
1734        /* read back registers to clear the counters */
1735        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1736                RREG32(sec_ded_counter_registers[i]);
1737
1738fail:
1739        amdgpu_ib_free(adev, &ib, NULL);
1740        dma_fence_put(f);
1741
1742        return r;
1743}
1744
1745static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1746{
1747        u32 gb_addr_config;
1748        u32 mc_shared_chmap, mc_arb_ramcfg;
1749        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1750        u32 tmp;
1751        int ret;
1752
1753        switch (adev->asic_type) {
1754        case CHIP_TOPAZ:
1755                adev->gfx.config.max_shader_engines = 1;
1756                adev->gfx.config.max_tile_pipes = 2;
1757                adev->gfx.config.max_cu_per_sh = 6;
1758                adev->gfx.config.max_sh_per_se = 1;
1759                adev->gfx.config.max_backends_per_se = 2;
1760                adev->gfx.config.max_texture_channel_caches = 2;
1761                adev->gfx.config.max_gprs = 256;
1762                adev->gfx.config.max_gs_threads = 32;
1763                adev->gfx.config.max_hw_contexts = 8;
1764
1765                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1766                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1767                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1768                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1769                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1770                break;
1771        case CHIP_FIJI:
1772                adev->gfx.config.max_shader_engines = 4;
1773                adev->gfx.config.max_tile_pipes = 16;
1774                adev->gfx.config.max_cu_per_sh = 16;
1775                adev->gfx.config.max_sh_per_se = 1;
1776                adev->gfx.config.max_backends_per_se = 4;
1777                adev->gfx.config.max_texture_channel_caches = 16;
1778                adev->gfx.config.max_gprs = 256;
1779                adev->gfx.config.max_gs_threads = 32;
1780                adev->gfx.config.max_hw_contexts = 8;
1781
1782                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1787                break;
1788        case CHIP_POLARIS11:
1789        case CHIP_POLARIS12:
1790                ret = amdgpu_atombios_get_gfx_info(adev);
1791                if (ret)
1792                        return ret;
1793                adev->gfx.config.max_gprs = 256;
1794                adev->gfx.config.max_gs_threads = 32;
1795                adev->gfx.config.max_hw_contexts = 8;
1796
1797                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1802                break;
1803        case CHIP_POLARIS10:
1804                ret = amdgpu_atombios_get_gfx_info(adev);
1805                if (ret)
1806                        return ret;
1807                adev->gfx.config.max_gprs = 256;
1808                adev->gfx.config.max_gs_threads = 32;
1809                adev->gfx.config.max_hw_contexts = 8;
1810
1811                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816                break;
1817        case CHIP_TONGA:
1818                adev->gfx.config.max_shader_engines = 4;
1819                adev->gfx.config.max_tile_pipes = 8;
1820                adev->gfx.config.max_cu_per_sh = 8;
1821                adev->gfx.config.max_sh_per_se = 1;
1822                adev->gfx.config.max_backends_per_se = 2;
1823                adev->gfx.config.max_texture_channel_caches = 8;
1824                adev->gfx.config.max_gprs = 256;
1825                adev->gfx.config.max_gs_threads = 32;
1826                adev->gfx.config.max_hw_contexts = 8;
1827
1828                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833                break;
1834        case CHIP_CARRIZO:
1835                adev->gfx.config.max_shader_engines = 1;
1836                adev->gfx.config.max_tile_pipes = 2;
1837                adev->gfx.config.max_sh_per_se = 1;
1838                adev->gfx.config.max_backends_per_se = 2;
1839                adev->gfx.config.max_cu_per_sh = 8;
1840                adev->gfx.config.max_texture_channel_caches = 2;
1841                adev->gfx.config.max_gprs = 256;
1842                adev->gfx.config.max_gs_threads = 32;
1843                adev->gfx.config.max_hw_contexts = 8;
1844
1845                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1850                break;
1851        case CHIP_STONEY:
1852                adev->gfx.config.max_shader_engines = 1;
1853                adev->gfx.config.max_tile_pipes = 2;
1854                adev->gfx.config.max_sh_per_se = 1;
1855                adev->gfx.config.max_backends_per_se = 1;
1856                adev->gfx.config.max_cu_per_sh = 3;
1857                adev->gfx.config.max_texture_channel_caches = 2;
1858                adev->gfx.config.max_gprs = 256;
1859                adev->gfx.config.max_gs_threads = 16;
1860                adev->gfx.config.max_hw_contexts = 8;
1861
1862                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1867                break;
1868        default:
1869                adev->gfx.config.max_shader_engines = 2;
1870                adev->gfx.config.max_tile_pipes = 4;
1871                adev->gfx.config.max_cu_per_sh = 2;
1872                adev->gfx.config.max_sh_per_se = 1;
1873                adev->gfx.config.max_backends_per_se = 2;
1874                adev->gfx.config.max_texture_channel_caches = 4;
1875                adev->gfx.config.max_gprs = 256;
1876                adev->gfx.config.max_gs_threads = 32;
1877                adev->gfx.config.max_hw_contexts = 8;
1878
1879                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1884                break;
1885        }
1886
1887        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1890
1891        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892        adev->gfx.config.mem_max_burst_length_bytes = 256;
1893        if (adev->flags & AMD_IS_APU) {
1894                /* Get memory bank mapping mode. */
1895                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1898
1899                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1902
1903                /* Validate settings in case only one DIMM installed. */
1904                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905                        dimm00_addr_map = 0;
1906                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907                        dimm01_addr_map = 0;
1908                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909                        dimm10_addr_map = 0;
1910                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911                        dimm11_addr_map = 0;
1912
1913                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916                        adev->gfx.config.mem_row_size_in_kb = 2;
1917                else
1918                        adev->gfx.config.mem_row_size_in_kb = 1;
1919        } else {
1920                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922                if (adev->gfx.config.mem_row_size_in_kb > 4)
1923                        adev->gfx.config.mem_row_size_in_kb = 4;
1924        }
1925
1926        adev->gfx.config.shader_engine_tile_size = 32;
1927        adev->gfx.config.num_gpus = 1;
1928        adev->gfx.config.multi_gpu_tile_size = 64;
1929
1930        /* fix up row size */
1931        switch (adev->gfx.config.mem_row_size_in_kb) {
1932        case 1:
1933        default:
1934                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1935                break;
1936        case 2:
1937                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1938                break;
1939        case 4:
1940                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1941                break;
1942        }
1943        adev->gfx.config.gb_addr_config = gb_addr_config;
1944
1945        return 0;
1946}
1947
1948static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949                                        int mec, int pipe, int queue)
1950{
1951        int r;
1952        unsigned irq_type;
1953        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1954
1955        ring = &adev->gfx.compute_ring[ring_id];
1956
1957        /* mec0 is me1 */
1958        ring->me = mec + 1;
1959        ring->pipe = pipe;
1960        ring->queue = queue;
1961
1962        ring->ring_obj = NULL;
1963        ring->use_doorbell = true;
1964        ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1965        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966                                + (ring_id * GFX8_MEC_HPD_SIZE);
1967        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1968
1969        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1971                + ring->pipe;
1972
1973        /* type-2 packets are deprecated on MEC, use type-3 instead */
1974        r = amdgpu_ring_init(adev, ring, 1024,
1975                        &adev->gfx.eop_irq, irq_type);
1976        if (r)
1977                return r;
1978
1979
1980        return 0;
1981}
1982
1983static int gfx_v8_0_sw_init(void *handle)
1984{
1985        int i, j, k, r, ring_id;
1986        struct amdgpu_ring *ring;
1987        struct amdgpu_kiq *kiq;
1988        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989
1990        switch (adev->asic_type) {
1991        case CHIP_FIJI:
1992        case CHIP_TONGA:
1993        case CHIP_POLARIS11:
1994        case CHIP_POLARIS12:
1995        case CHIP_POLARIS10:
1996        case CHIP_CARRIZO:
1997                adev->gfx.mec.num_mec = 2;
1998                break;
1999        case CHIP_TOPAZ:
2000        case CHIP_STONEY:
2001        default:
2002                adev->gfx.mec.num_mec = 1;
2003                break;
2004        }
2005
2006        adev->gfx.mec.num_pipe_per_mec = 4;
2007        adev->gfx.mec.num_queue_per_pipe = 8;
2008
2009        /* KIQ event */
2010        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2011        if (r)
2012                return r;
2013
2014        /* EOP Event */
2015        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2016        if (r)
2017                return r;
2018
2019        /* Privileged reg */
2020        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2021                              &adev->gfx.priv_reg_irq);
2022        if (r)
2023                return r;
2024
2025        /* Privileged inst */
2026        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2027                              &adev->gfx.priv_inst_irq);
2028        if (r)
2029                return r;
2030
2031        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2032
2033        gfx_v8_0_scratch_init(adev);
2034
2035        r = gfx_v8_0_init_microcode(adev);
2036        if (r) {
2037                DRM_ERROR("Failed to load gfx firmware!\n");
2038                return r;
2039        }
2040
2041        r = gfx_v8_0_rlc_init(adev);
2042        if (r) {
2043                DRM_ERROR("Failed to init rlc BOs!\n");
2044                return r;
2045        }
2046
2047        r = gfx_v8_0_mec_init(adev);
2048        if (r) {
2049                DRM_ERROR("Failed to init MEC BOs!\n");
2050                return r;
2051        }
2052
2053        /* set up the gfx ring */
2054        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2055                ring = &adev->gfx.gfx_ring[i];
2056                ring->ring_obj = NULL;
2057                sprintf(ring->name, "gfx");
2058                /* no gfx doorbells on iceland */
2059                if (adev->asic_type != CHIP_TOPAZ) {
2060                        ring->use_doorbell = true;
2061                        ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2062                }
2063
2064                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2065                                     AMDGPU_CP_IRQ_GFX_EOP);
2066                if (r)
2067                        return r;
2068        }
2069
2070
2071        /* set up the compute queues - allocate horizontally across pipes */
2072        ring_id = 0;
2073        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2074                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2075                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2076                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2077                                        continue;
2078
2079                                r = gfx_v8_0_compute_ring_init(adev,
2080                                                                ring_id,
2081                                                                i, k, j);
2082                                if (r)
2083                                        return r;
2084
2085                                ring_id++;
2086                        }
2087                }
2088        }
2089
2090        r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2091        if (r) {
2092                DRM_ERROR("Failed to init KIQ BOs!\n");
2093                return r;
2094        }
2095
2096        kiq = &adev->gfx.kiq;
2097        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2098        if (r)
2099                return r;
2100
2101        /* create MQD for all compute queues as well as KIQ for SRIOV case */
2102        r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2103        if (r)
2104                return r;
2105
2106        /* reserve GDS, GWS and OA resource for gfx */
2107        r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2108                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2109                                    &adev->gds.gds_gfx_bo, NULL, NULL);
2110        if (r)
2111                return r;
2112
2113        r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2114                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2115                                    &adev->gds.gws_gfx_bo, NULL, NULL);
2116        if (r)
2117                return r;
2118
2119        r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2120                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2121                                    &adev->gds.oa_gfx_bo, NULL, NULL);
2122        if (r)
2123                return r;
2124
2125        adev->gfx.ce_ram_size = 0x8000;
2126
2127        r = gfx_v8_0_gpu_early_init(adev);
2128        if (r)
2129                return r;
2130
2131        return 0;
2132}
2133
2134static int gfx_v8_0_sw_fini(void *handle)
2135{
2136        int i;
2137        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2138
2139        amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2140        amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2141        amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2142
2143        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2144                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2145        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2146                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2147
2148        amdgpu_gfx_compute_mqd_sw_fini(adev);
2149        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2150        amdgpu_gfx_kiq_fini(adev);
2151
2152        gfx_v8_0_mec_fini(adev);
2153        gfx_v8_0_rlc_fini(adev);
2154        gfx_v8_0_free_microcode(adev);
2155
2156        return 0;
2157}
2158
2159static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2160{
2161        uint32_t *modearray, *mod2array;
2162        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2163        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2164        u32 reg_offset;
2165
2166        modearray = adev->gfx.config.tile_mode_array;
2167        mod2array = adev->gfx.config.macrotile_mode_array;
2168
2169        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170                modearray[reg_offset] = 0;
2171
2172        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2173                mod2array[reg_offset] = 0;
2174
2175        switch (adev->asic_type) {
2176        case CHIP_TOPAZ:
2177                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178                                PIPE_CONFIG(ADDR_SURF_P2) |
2179                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2180                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                PIPE_CONFIG(ADDR_SURF_P2) |
2183                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2184                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2185                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186                                PIPE_CONFIG(ADDR_SURF_P2) |
2187                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2188                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2189                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                PIPE_CONFIG(ADDR_SURF_P2) |
2191                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2192                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2193                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194                                PIPE_CONFIG(ADDR_SURF_P2) |
2195                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2196                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2197                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198                                PIPE_CONFIG(ADDR_SURF_P2) |
2199                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2200                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2201                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202                                PIPE_CONFIG(ADDR_SURF_P2) |
2203                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2204                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2205                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2206                                PIPE_CONFIG(ADDR_SURF_P2));
2207                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2208                                PIPE_CONFIG(ADDR_SURF_P2) |
2209                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2210                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                 PIPE_CONFIG(ADDR_SURF_P2) |
2213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2214                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2216                                 PIPE_CONFIG(ADDR_SURF_P2) |
2217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2218                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2219                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2220                                 PIPE_CONFIG(ADDR_SURF_P2) |
2221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2222                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2223                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                 PIPE_CONFIG(ADDR_SURF_P2) |
2225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2226                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P2) |
2229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P2) |
2233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2235                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2236                                 PIPE_CONFIG(ADDR_SURF_P2) |
2237                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2238                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2240                                 PIPE_CONFIG(ADDR_SURF_P2) |
2241                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2244                                 PIPE_CONFIG(ADDR_SURF_P2) |
2245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2246                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2247                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2250                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2251                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2252                                 PIPE_CONFIG(ADDR_SURF_P2) |
2253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2254                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2255                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2256                                 PIPE_CONFIG(ADDR_SURF_P2) |
2257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2258                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2259                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2260                                 PIPE_CONFIG(ADDR_SURF_P2) |
2261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2262                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2263                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2264                                 PIPE_CONFIG(ADDR_SURF_P2) |
2265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2266                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2267                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268                                 PIPE_CONFIG(ADDR_SURF_P2) |
2269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2270                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                 PIPE_CONFIG(ADDR_SURF_P2) |
2273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2274                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2278                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2279
2280                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2282                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283                                NUM_BANKS(ADDR_SURF_8_BANK));
2284                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2287                                NUM_BANKS(ADDR_SURF_8_BANK));
2288                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291                                NUM_BANKS(ADDR_SURF_8_BANK));
2292                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295                                NUM_BANKS(ADDR_SURF_8_BANK));
2296                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299                                NUM_BANKS(ADDR_SURF_8_BANK));
2300                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2303                                NUM_BANKS(ADDR_SURF_8_BANK));
2304                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307                                NUM_BANKS(ADDR_SURF_8_BANK));
2308                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2309                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2310                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2311                                NUM_BANKS(ADDR_SURF_16_BANK));
2312                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2313                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2314                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2315                                NUM_BANKS(ADDR_SURF_16_BANK));
2316                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2319                                 NUM_BANKS(ADDR_SURF_16_BANK));
2320                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2323                                 NUM_BANKS(ADDR_SURF_16_BANK));
2324                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2327                                 NUM_BANKS(ADDR_SURF_16_BANK));
2328                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2331                                 NUM_BANKS(ADDR_SURF_16_BANK));
2332                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336
2337                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2338                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2339                            reg_offset != 23)
2340                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2341
2342                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2343                        if (reg_offset != 7)
2344                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2345
2346                break;
2347        case CHIP_FIJI:
2348                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2351                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2355                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2359                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2363                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2367                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2371                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2372                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2375                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2376                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2378                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2379                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2381                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2382                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2389                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2398                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2407                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2419                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2423                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2427                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2431                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2433                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2434                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2435                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2437                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2438                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2439                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2441                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2442                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2443                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2446                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2449                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2450                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2454                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470
2471                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2473                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474                                NUM_BANKS(ADDR_SURF_8_BANK));
2475                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478                                NUM_BANKS(ADDR_SURF_8_BANK));
2479                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482                                NUM_BANKS(ADDR_SURF_8_BANK));
2483                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486                                NUM_BANKS(ADDR_SURF_8_BANK));
2487                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490                                NUM_BANKS(ADDR_SURF_8_BANK));
2491                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494                                NUM_BANKS(ADDR_SURF_8_BANK));
2495                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498                                NUM_BANKS(ADDR_SURF_8_BANK));
2499                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2502                                NUM_BANKS(ADDR_SURF_8_BANK));
2503                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                NUM_BANKS(ADDR_SURF_8_BANK));
2507                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510                                 NUM_BANKS(ADDR_SURF_8_BANK));
2511                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514                                 NUM_BANKS(ADDR_SURF_8_BANK));
2515                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                 NUM_BANKS(ADDR_SURF_8_BANK));
2519                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                 NUM_BANKS(ADDR_SURF_8_BANK));
2523                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526                                 NUM_BANKS(ADDR_SURF_4_BANK));
2527
2528                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2529                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2530
2531                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532                        if (reg_offset != 7)
2533                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2534
2535                break;
2536        case CHIP_TONGA:
2537                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2540                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2544                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2548                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2552                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2553                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2556                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2560                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2561                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2564                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2565                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2568                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2569                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2570                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2571                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2587                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2596                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2602                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2608                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2612                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2616                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2620                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2622                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2623                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2624                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2626                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2627                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2628                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2630                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2631                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2632                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2634                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2635                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2638                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2639                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2642                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2643                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2646                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2650                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2658                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659
2660                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663                                NUM_BANKS(ADDR_SURF_16_BANK));
2664                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667                                NUM_BANKS(ADDR_SURF_16_BANK));
2668                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671                                NUM_BANKS(ADDR_SURF_16_BANK));
2672                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2675                                NUM_BANKS(ADDR_SURF_16_BANK));
2676                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679                                NUM_BANKS(ADDR_SURF_16_BANK));
2680                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683                                NUM_BANKS(ADDR_SURF_16_BANK));
2684                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687                                NUM_BANKS(ADDR_SURF_16_BANK));
2688                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2690                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691                                NUM_BANKS(ADDR_SURF_16_BANK));
2692                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2695                                NUM_BANKS(ADDR_SURF_16_BANK));
2696                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2699                                 NUM_BANKS(ADDR_SURF_16_BANK));
2700                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2703                                 NUM_BANKS(ADDR_SURF_16_BANK));
2704                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707                                 NUM_BANKS(ADDR_SURF_8_BANK));
2708                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711                                 NUM_BANKS(ADDR_SURF_4_BANK));
2712                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2715                                 NUM_BANKS(ADDR_SURF_4_BANK));
2716
2717                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2718                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2719
2720                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2721                        if (reg_offset != 7)
2722                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2723
2724                break;
2725        case CHIP_POLARIS11:
2726        case CHIP_POLARIS12:
2727                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2730                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2738                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2739                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2742                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2743                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2746                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2748                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2750                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2751                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2754                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2755                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2758                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2760                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2761                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2764                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2768                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2769                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2772                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2777                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2780                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2784                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2786                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2788                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2792                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2798                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2802                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2806                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2810                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2812                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2813                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2814                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2816                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2817                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2818                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2820                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2821                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2822                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2824                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2825                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2826                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2828                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2829                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2830                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2832                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2833                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2840                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2844                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2848                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2849
2850                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2852                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2853                                NUM_BANKS(ADDR_SURF_16_BANK));
2854
2855                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2857                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2858                                NUM_BANKS(ADDR_SURF_16_BANK));
2859
2860                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                NUM_BANKS(ADDR_SURF_16_BANK));
2864
2865                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2867                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2868                                NUM_BANKS(ADDR_SURF_16_BANK));
2869
2870                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2873                                NUM_BANKS(ADDR_SURF_16_BANK));
2874
2875                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2878                                NUM_BANKS(ADDR_SURF_16_BANK));
2879
2880                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2886                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2887                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888                                NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2891                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893                                NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2897                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2902                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2903                                NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908                                NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913                                NUM_BANKS(ADDR_SURF_8_BANK));
2914
2915                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2918                                NUM_BANKS(ADDR_SURF_4_BANK));
2919
2920                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2921                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2922
2923                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2924                        if (reg_offset != 7)
2925                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2926
2927                break;
2928        case CHIP_POLARIS10:
2929                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2932                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2936                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2938                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2940                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2941                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2944                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2945                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2948                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2949                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2950                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2952                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2953                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2956                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2957                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2959                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2960                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2961                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2962                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2963                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2964                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2966                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2970                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2977                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2978                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2979                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2988                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2990                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2994                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3000                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3004                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3008                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3012                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3014                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3015                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3016                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3018                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3019                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3020                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3022                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3023                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3024                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3026                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3027                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3028                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3030                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3031                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3032                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3034                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3035                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3038                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3046                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3049                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3050                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051
3052                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3054                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3055                                NUM_BANKS(ADDR_SURF_16_BANK));
3056
3057                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060                                NUM_BANKS(ADDR_SURF_16_BANK));
3061
3062                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3064                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                NUM_BANKS(ADDR_SURF_16_BANK));
3066
3067                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3069                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070                                NUM_BANKS(ADDR_SURF_16_BANK));
3071
3072                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075                                NUM_BANKS(ADDR_SURF_16_BANK));
3076
3077                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3080                                NUM_BANKS(ADDR_SURF_16_BANK));
3081
3082                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3084                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3085                                NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3089                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095                                NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3099                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100                                NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110                                NUM_BANKS(ADDR_SURF_8_BANK));
3111
3112                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115                                NUM_BANKS(ADDR_SURF_4_BANK));
3116
3117                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                NUM_BANKS(ADDR_SURF_4_BANK));
3121
3122                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3124
3125                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3126                        if (reg_offset != 7)
3127                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3128
3129                break;
3130        case CHIP_STONEY:
3131                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132                                PIPE_CONFIG(ADDR_SURF_P2) |
3133                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3134                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                PIPE_CONFIG(ADDR_SURF_P2) |
3137                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3138                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3139                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140                                PIPE_CONFIG(ADDR_SURF_P2) |
3141                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3142                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3143                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                PIPE_CONFIG(ADDR_SURF_P2) |
3145                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3146                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3147                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3148                                PIPE_CONFIG(ADDR_SURF_P2) |
3149                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3150                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3151                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152                                PIPE_CONFIG(ADDR_SURF_P2) |
3153                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3154                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3155                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156                                PIPE_CONFIG(ADDR_SURF_P2) |
3157                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3158                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3159                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3160                                PIPE_CONFIG(ADDR_SURF_P2));
3161                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3162                                PIPE_CONFIG(ADDR_SURF_P2) |
3163                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3164                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3165                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166                                 PIPE_CONFIG(ADDR_SURF_P2) |
3167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3170                                 PIPE_CONFIG(ADDR_SURF_P2) |
3171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3172                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3173                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3174                                 PIPE_CONFIG(ADDR_SURF_P2) |
3175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3176                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                 PIPE_CONFIG(ADDR_SURF_P2) |
3179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3180                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3182                                 PIPE_CONFIG(ADDR_SURF_P2) |
3183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186                                 PIPE_CONFIG(ADDR_SURF_P2) |
3187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3189                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3190                                 PIPE_CONFIG(ADDR_SURF_P2) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3192                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3194                                 PIPE_CONFIG(ADDR_SURF_P2) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3198                                 PIPE_CONFIG(ADDR_SURF_P2) |
3199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3200                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3201                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3202                                 PIPE_CONFIG(ADDR_SURF_P2) |
3203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3204                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3205                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3206                                 PIPE_CONFIG(ADDR_SURF_P2) |
3207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3208                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3209                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3210                                 PIPE_CONFIG(ADDR_SURF_P2) |
3211                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3213                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3216                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3217                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3220                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3221                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3224                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3225                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2) |
3227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3228                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3233
3234                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3236                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237                                NUM_BANKS(ADDR_SURF_8_BANK));
3238                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                NUM_BANKS(ADDR_SURF_8_BANK));
3242                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3244                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3245                                NUM_BANKS(ADDR_SURF_8_BANK));
3246                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3247                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3248                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3249                                NUM_BANKS(ADDR_SURF_8_BANK));
3250                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3253                                NUM_BANKS(ADDR_SURF_8_BANK));
3254                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3257                                NUM_BANKS(ADDR_SURF_8_BANK));
3258                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261                                NUM_BANKS(ADDR_SURF_8_BANK));
3262                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3263                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3264                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265                                NUM_BANKS(ADDR_SURF_16_BANK));
3266                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3267                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269                                NUM_BANKS(ADDR_SURF_16_BANK));
3270                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273                                 NUM_BANKS(ADDR_SURF_16_BANK));
3274                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277                                 NUM_BANKS(ADDR_SURF_16_BANK));
3278                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3281                                 NUM_BANKS(ADDR_SURF_16_BANK));
3282                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3285                                 NUM_BANKS(ADDR_SURF_16_BANK));
3286                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289                                 NUM_BANKS(ADDR_SURF_8_BANK));
3290
3291                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3292                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3293                            reg_offset != 23)
3294                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3295
3296                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3297                        if (reg_offset != 7)
3298                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3299
3300                break;
3301        default:
3302                dev_warn(adev->dev,
3303                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3304                         adev->asic_type);
3305
3306        case CHIP_CARRIZO:
3307                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308                                PIPE_CONFIG(ADDR_SURF_P2) |
3309                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3310                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312                                PIPE_CONFIG(ADDR_SURF_P2) |
3313                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3314                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3315                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316                                PIPE_CONFIG(ADDR_SURF_P2) |
3317                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3318                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3319                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                PIPE_CONFIG(ADDR_SURF_P2) |
3321                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3322                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3323                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3324                                PIPE_CONFIG(ADDR_SURF_P2) |
3325                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3326                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3327                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328                                PIPE_CONFIG(ADDR_SURF_P2) |
3329                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3330                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3331                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332                                PIPE_CONFIG(ADDR_SURF_P2) |
3333                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3334                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3335                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3336                                PIPE_CONFIG(ADDR_SURF_P2));
3337                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3338                                PIPE_CONFIG(ADDR_SURF_P2) |
3339                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3340                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3341                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342                                 PIPE_CONFIG(ADDR_SURF_P2) |
3343                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3344                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3346                                 PIPE_CONFIG(ADDR_SURF_P2) |
3347                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3348                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3349                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3350                                 PIPE_CONFIG(ADDR_SURF_P2) |
3351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3352                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3353                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354                                 PIPE_CONFIG(ADDR_SURF_P2) |
3355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3357                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3358                                 PIPE_CONFIG(ADDR_SURF_P2) |
3359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3361                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362                                 PIPE_CONFIG(ADDR_SURF_P2) |
3363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3365                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3366                                 PIPE_CONFIG(ADDR_SURF_P2) |
3367                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3368                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3370                                 PIPE_CONFIG(ADDR_SURF_P2) |
3371                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3374                                 PIPE_CONFIG(ADDR_SURF_P2) |
3375                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3376                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3377                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3378                                 PIPE_CONFIG(ADDR_SURF_P2) |
3379                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3380                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3381                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3382                                 PIPE_CONFIG(ADDR_SURF_P2) |
3383                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3384                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3385                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3386                                 PIPE_CONFIG(ADDR_SURF_P2) |
3387                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3388                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3389                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3390                                 PIPE_CONFIG(ADDR_SURF_P2) |
3391                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3392                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3393                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3394                                 PIPE_CONFIG(ADDR_SURF_P2) |
3395                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3396                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3397                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3398                                 PIPE_CONFIG(ADDR_SURF_P2) |
3399                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3400                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3401                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2) |
3403                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3404                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3405                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3408                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3409
3410                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                NUM_BANKS(ADDR_SURF_8_BANK));
3414                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3415                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3416                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                NUM_BANKS(ADDR_SURF_8_BANK));
3418                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3419                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3420                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3421                                NUM_BANKS(ADDR_SURF_8_BANK));
3422                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3424                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3425                                NUM_BANKS(ADDR_SURF_8_BANK));
3426                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3429                                NUM_BANKS(ADDR_SURF_8_BANK));
3430                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433                                NUM_BANKS(ADDR_SURF_8_BANK));
3434                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437                                NUM_BANKS(ADDR_SURF_8_BANK));
3438                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3439                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3440                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3441                                NUM_BANKS(ADDR_SURF_16_BANK));
3442                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3443                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3444                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3445                                NUM_BANKS(ADDR_SURF_16_BANK));
3446                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3449                                 NUM_BANKS(ADDR_SURF_16_BANK));
3450                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3453                                 NUM_BANKS(ADDR_SURF_16_BANK));
3454                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3457                                 NUM_BANKS(ADDR_SURF_16_BANK));
3458                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3461                                 NUM_BANKS(ADDR_SURF_16_BANK));
3462                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3465                                 NUM_BANKS(ADDR_SURF_8_BANK));
3466
3467                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3468                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3469                            reg_offset != 23)
3470                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3471
3472                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3473                        if (reg_offset != 7)
3474                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3475
3476                break;
3477        }
3478}
3479
3480static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3481                                  u32 se_num, u32 sh_num, u32 instance)
3482{
3483        u32 data;
3484
3485        if (instance == 0xffffffff)
3486                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3487        else
3488                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3489
3490        if (se_num == 0xffffffff)
3491                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3492        else
3493                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3494
3495        if (sh_num == 0xffffffff)
3496                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3497        else
3498                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3499
3500        WREG32(mmGRBM_GFX_INDEX, data);
3501}
3502
3503static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3504{
3505        u32 data, mask;
3506
3507        data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3508                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3509
3510        data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3511
3512        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513                                         adev->gfx.config.max_sh_per_se);
3514
3515        return (~data) & mask;
3516}
3517
3518static void
3519gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3520{
3521        switch (adev->asic_type) {
3522        case CHIP_FIJI:
3523                *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3524                          RB_XSEL2(1) | PKR_MAP(2) |
3525                          PKR_XSEL(1) | PKR_YSEL(1) |
3526                          SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3527                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3528                           SE_PAIR_YSEL(2);
3529                break;
3530        case CHIP_TONGA:
3531        case CHIP_POLARIS10:
3532                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3533                          SE_XSEL(1) | SE_YSEL(1);
3534                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3535                           SE_PAIR_YSEL(2);
3536                break;
3537        case CHIP_TOPAZ:
3538        case CHIP_CARRIZO:
3539                *rconf |= RB_MAP_PKR0(2);
3540                *rconf1 |= 0x0;
3541                break;
3542        case CHIP_POLARIS11:
3543        case CHIP_POLARIS12:
3544                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3545                          SE_XSEL(1) | SE_YSEL(1);
3546                *rconf1 |= 0x0;
3547                break;
3548        case CHIP_STONEY:
3549                *rconf |= 0x0;
3550                *rconf1 |= 0x0;
3551                break;
3552        default:
3553                DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3554                break;
3555        }
3556}
3557
3558static void
3559gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3560                                        u32 raster_config, u32 raster_config_1,
3561                                        unsigned rb_mask, unsigned num_rb)
3562{
3563        unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3564        unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3565        unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3566        unsigned rb_per_se = num_rb / num_se;
3567        unsigned se_mask[4];
3568        unsigned se;
3569
3570        se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3571        se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3572        se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3573        se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3574
3575        WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3576        WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3577        WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3578
3579        if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3580                             (!se_mask[2] && !se_mask[3]))) {
3581                raster_config_1 &= ~SE_PAIR_MAP_MASK;
3582
3583                if (!se_mask[0] && !se_mask[1]) {
3584                        raster_config_1 |=
3585                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3586                } else {
3587                        raster_config_1 |=
3588                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3589                }
3590        }
3591
3592        for (se = 0; se < num_se; se++) {
3593                unsigned raster_config_se = raster_config;
3594                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3595                unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3596                int idx = (se / 2) * 2;
3597
3598                if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3599                        raster_config_se &= ~SE_MAP_MASK;
3600
3601                        if (!se_mask[idx]) {
3602                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3603                        } else {
3604                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3605                        }
3606                }
3607
3608                pkr0_mask &= rb_mask;
3609                pkr1_mask &= rb_mask;
3610                if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3611                        raster_config_se &= ~PKR_MAP_MASK;
3612
3613                        if (!pkr0_mask) {
3614                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3615                        } else {
3616                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3617                        }
3618                }
3619
3620                if (rb_per_se >= 2) {
3621                        unsigned rb0_mask = 1 << (se * rb_per_se);
3622                        unsigned rb1_mask = rb0_mask << 1;
3623
3624                        rb0_mask &= rb_mask;
3625                        rb1_mask &= rb_mask;
3626                        if (!rb0_mask || !rb1_mask) {
3627                                raster_config_se &= ~RB_MAP_PKR0_MASK;
3628
3629                                if (!rb0_mask) {
3630                                        raster_config_se |=
3631                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3632                                } else {
3633                                        raster_config_se |=
3634                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3635                                }
3636                        }
3637
3638                        if (rb_per_se > 2) {
3639                                rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3640                                rb1_mask = rb0_mask << 1;
3641                                rb0_mask &= rb_mask;
3642                                rb1_mask &= rb_mask;
3643                                if (!rb0_mask || !rb1_mask) {
3644                                        raster_config_se &= ~RB_MAP_PKR1_MASK;
3645
3646                                        if (!rb0_mask) {
3647                                                raster_config_se |=
3648                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3649                                        } else {
3650                                                raster_config_se |=
3651                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3652                                        }
3653                                }
3654                        }
3655                }
3656
3657                /* GRBM_GFX_INDEX has a different offset on VI */
3658                gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3659                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3660                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3661        }
3662
3663        /* GRBM_GFX_INDEX has a different offset on VI */
3664        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3665}
3666
3667static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3668{
3669        int i, j;
3670        u32 data;
3671        u32 raster_config = 0, raster_config_1 = 0;
3672        u32 active_rbs = 0;
3673        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3674                                        adev->gfx.config.max_sh_per_se;
3675        unsigned num_rb_pipes;
3676
3677        mutex_lock(&adev->grbm_idx_mutex);
3678        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3679                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3680                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3681                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3682                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3683                                               rb_bitmap_width_per_sh);
3684                }
3685        }
3686        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687
3688        adev->gfx.config.backend_enable_mask = active_rbs;
3689        adev->gfx.config.num_rbs = hweight32(active_rbs);
3690
3691        num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3692                             adev->gfx.config.max_shader_engines, 16);
3693
3694        gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3695
3696        if (!adev->gfx.config.backend_enable_mask ||
3697                        adev->gfx.config.num_rbs >= num_rb_pipes) {
3698                WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3699                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3700        } else {
3701                gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3702                                                        adev->gfx.config.backend_enable_mask,
3703                                                        num_rb_pipes);
3704        }
3705
3706        /* cache the values for userspace */
3707        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3708                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3709                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3710                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
3711                                RREG32(mmCC_RB_BACKEND_DISABLE);
3712                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3713                                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3714                        adev->gfx.config.rb_config[i][j].raster_config =
3715                                RREG32(mmPA_SC_RASTER_CONFIG);
3716                        adev->gfx.config.rb_config[i][j].raster_config_1 =
3717                                RREG32(mmPA_SC_RASTER_CONFIG_1);
3718                }
3719        }
3720        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3721        mutex_unlock(&adev->grbm_idx_mutex);
3722}
3723
3724/**
3725 * gfx_v8_0_init_compute_vmid - gart enable
3726 *
3727 * @adev: amdgpu_device pointer
3728 *
3729 * Initialize compute vmid sh_mem registers
3730 *
3731 */
3732#define DEFAULT_SH_MEM_BASES    (0x6000)
3733#define FIRST_COMPUTE_VMID      (8)
3734#define LAST_COMPUTE_VMID       (16)
3735static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3736{
3737        int i;
3738        uint32_t sh_mem_config;
3739        uint32_t sh_mem_bases;
3740
3741        /*
3742         * Configure apertures:
3743         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3744         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3745         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3746         */
3747        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3748
3749        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3750                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3751                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3752                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3753                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3754                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3755
3756        mutex_lock(&adev->srbm_mutex);
3757        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3758                vi_srbm_select(adev, 0, 0, 0, i);
3759                /* CP and shaders */
3760                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3761                WREG32(mmSH_MEM_APE1_BASE, 1);
3762                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3763                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3764        }
3765        vi_srbm_select(adev, 0, 0, 0, 0);
3766        mutex_unlock(&adev->srbm_mutex);
3767}
3768
3769static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3770{
3771        switch (adev->asic_type) {
3772        default:
3773                adev->gfx.config.double_offchip_lds_buf = 1;
3774                break;
3775        case CHIP_CARRIZO:
3776        case CHIP_STONEY:
3777                adev->gfx.config.double_offchip_lds_buf = 0;
3778                break;
3779        }
3780}
3781
3782static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3783{
3784        u32 tmp, sh_static_mem_cfg;
3785        int i;
3786
3787        WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3788        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3789        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3791
3792        gfx_v8_0_tiling_mode_table_init(adev);
3793        gfx_v8_0_setup_rb(adev);
3794        gfx_v8_0_get_cu_info(adev);
3795        gfx_v8_0_config_init(adev);
3796
3797        /* XXX SH_MEM regs */
3798        /* where to put LDS, scratch, GPUVM in FSA64 space */
3799        sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3800                                   SWIZZLE_ENABLE, 1);
3801        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3802                                   ELEMENT_SIZE, 1);
3803        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804                                   INDEX_STRIDE, 3);
3805        mutex_lock(&adev->srbm_mutex);
3806        for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3807                vi_srbm_select(adev, 0, 0, 0, i);
3808                /* CP and shaders */
3809                if (i == 0) {
3810                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3811                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814                        WREG32(mmSH_MEM_CONFIG, tmp);
3815                        WREG32(mmSH_MEM_BASES, 0);
3816                } else {
3817                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3818                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3819                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3820                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3821                        WREG32(mmSH_MEM_CONFIG, tmp);
3822                        tmp = adev->mc.shared_aperture_start >> 48;
3823                        WREG32(mmSH_MEM_BASES, tmp);
3824                }
3825
3826                WREG32(mmSH_MEM_APE1_BASE, 1);
3827                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3828                WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3829        }
3830        vi_srbm_select(adev, 0, 0, 0, 0);
3831        mutex_unlock(&adev->srbm_mutex);
3832
3833        gfx_v8_0_init_compute_vmid(adev);
3834
3835        mutex_lock(&adev->grbm_idx_mutex);
3836        /*
3837         * making sure that the following register writes will be broadcasted
3838         * to all the shaders
3839         */
3840        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3841
3842        WREG32(mmPA_SC_FIFO_SIZE,
3843                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3844                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3845                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3846                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3847                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3848                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3849                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3850                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3851
3852        tmp = RREG32(mmSPI_ARB_PRIORITY);
3853        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3854        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3855        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3856        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3857        WREG32(mmSPI_ARB_PRIORITY, tmp);
3858
3859        mutex_unlock(&adev->grbm_idx_mutex);
3860
3861}
3862
3863static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3864{
3865        u32 i, j, k;
3866        u32 mask;
3867
3868        mutex_lock(&adev->grbm_idx_mutex);
3869        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3870                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3871                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3872                        for (k = 0; k < adev->usec_timeout; k++) {
3873                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3874                                        break;
3875                                udelay(1);
3876                        }
3877                }
3878        }
3879        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3880        mutex_unlock(&adev->grbm_idx_mutex);
3881
3882        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886        for (k = 0; k < adev->usec_timeout; k++) {
3887                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3888                        break;
3889                udelay(1);
3890        }
3891}
3892
3893static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3894                                               bool enable)
3895{
3896        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3897
3898        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3902
3903        WREG32(mmCP_INT_CNTL_RING0, tmp);
3904}
3905
3906static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3907{
3908        /* csib */
3909        WREG32(mmRLC_CSIB_ADDR_HI,
3910                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3911        WREG32(mmRLC_CSIB_ADDR_LO,
3912                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3913        WREG32(mmRLC_CSIB_LENGTH,
3914                        adev->gfx.rlc.clear_state_size);
3915}
3916
3917static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3918                                int ind_offset,
3919                                int list_size,
3920                                int *unique_indices,
3921                                int *indices_count,
3922                                int max_indices,
3923                                int *ind_start_offsets,
3924                                int *offset_count,
3925                                int max_offset)
3926{
3927        int indices;
3928        bool new_entry = true;
3929
3930        for (; ind_offset < list_size; ind_offset++) {
3931
3932                if (new_entry) {
3933                        new_entry = false;
3934                        ind_start_offsets[*offset_count] = ind_offset;
3935                        *offset_count = *offset_count + 1;
3936                        BUG_ON(*offset_count >= max_offset);
3937                }
3938
3939                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3940                        new_entry = true;
3941                        continue;
3942                }
3943
3944                ind_offset += 2;
3945
3946                /* look for the matching indice */
3947                for (indices = 0;
3948                        indices < *indices_count;
3949                        indices++) {
3950                        if (unique_indices[indices] ==
3951                                register_list_format[ind_offset])
3952                                break;
3953                }
3954
3955                if (indices >= *indices_count) {
3956                        unique_indices[*indices_count] =
3957                                register_list_format[ind_offset];
3958                        indices = *indices_count;
3959                        *indices_count = *indices_count + 1;
3960                        BUG_ON(*indices_count >= max_indices);
3961                }
3962
3963                register_list_format[ind_offset] = indices;
3964        }
3965}
3966
3967static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3968{
3969        int i, temp, data;
3970        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3971        int indices_count = 0;
3972        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3973        int offset_count = 0;
3974
3975        int list_size;
3976        unsigned int *register_list_format =
3977                kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3978        if (!register_list_format)
3979                return -ENOMEM;
3980        memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3981                        adev->gfx.rlc.reg_list_format_size_bytes);
3982
3983        gfx_v8_0_parse_ind_reg_list(register_list_format,
3984                                RLC_FormatDirectRegListLength,
3985                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3986                                unique_indices,
3987                                &indices_count,
3988                                sizeof(unique_indices) / sizeof(int),
3989                                indirect_start_offsets,
3990                                &offset_count,
3991                                sizeof(indirect_start_offsets)/sizeof(int));
3992
3993        /* save and restore list */
3994        WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3995
3996        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3999
4000        /* indirect list */
4001        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4004
4005        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006        list_size = list_size >> 1;
4007        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4009
4010        /* starting offsets starts */
4011        WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012                adev->gfx.rlc.starting_offsets_start);
4013        for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4014                WREG32(mmRLC_GPM_SCRATCH_DATA,
4015                                indirect_start_offsets[i]);
4016
4017        /* unique indices */
4018        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020        for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4021                if (unique_indices[i] != 0) {
4022                        WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023                        WREG32(data + i, unique_indices[i] >> 20);
4024                }
4025        }
4026        kfree(register_list_format);
4027
4028        return 0;
4029}
4030
4031static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4032{
4033        WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4034}
4035
4036static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4037{
4038        uint32_t data;
4039
4040        WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4041
4042        data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043        data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044        data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045        data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046        WREG32(mmRLC_PG_DELAY, data);
4047
4048        WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049        WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4050
4051}
4052
4053static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4054                                                bool enable)
4055{
4056        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4057}
4058
4059static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4060                                                  bool enable)
4061{
4062        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4063}
4064
4065static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4066{
4067        WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4068}
4069
4070static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4071{
4072        if ((adev->asic_type == CHIP_CARRIZO) ||
4073            (adev->asic_type == CHIP_STONEY)) {
4074                gfx_v8_0_init_csb(adev);
4075                gfx_v8_0_init_save_restore_list(adev);
4076                gfx_v8_0_enable_save_restore_machine(adev);
4077                WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078                gfx_v8_0_init_power_gating(adev);
4079                WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4080        } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081                   (adev->asic_type == CHIP_POLARIS12)) {
4082                gfx_v8_0_init_csb(adev);
4083                gfx_v8_0_init_save_restore_list(adev);
4084                gfx_v8_0_enable_save_restore_machine(adev);
4085                gfx_v8_0_init_power_gating(adev);
4086        }
4087
4088}
4089
4090static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4091{
4092        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4093
4094        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4095        gfx_v8_0_wait_for_rlc_serdes(adev);
4096}
4097
4098static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4099{
4100        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4101        udelay(50);
4102
4103        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4104        udelay(50);
4105}
4106
4107static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4108{
4109        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4110
4111        /* carrizo do enable cp interrupt after cp inited */
4112        if (!(adev->flags & AMD_IS_APU))
4113                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4114
4115        udelay(50);
4116}
4117
4118static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4119{
4120        const struct rlc_firmware_header_v2_0 *hdr;
4121        const __le32 *fw_data;
4122        unsigned i, fw_size;
4123
4124        if (!adev->gfx.rlc_fw)
4125                return -EINVAL;
4126
4127        hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4128        amdgpu_ucode_print_rlc_hdr(&hdr->header);
4129
4130        fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4131                           le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4132        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4133
4134        WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4135        for (i = 0; i < fw_size; i++)
4136                WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4137        WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4138
4139        return 0;
4140}
4141
4142static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4143{
4144        int r;
4145        u32 tmp;
4146
4147        gfx_v8_0_rlc_stop(adev);
4148
4149        /* disable CG */
4150        tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4151        tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4152                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4153        WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4154        if (adev->asic_type == CHIP_POLARIS11 ||
4155            adev->asic_type == CHIP_POLARIS10 ||
4156            adev->asic_type == CHIP_POLARIS12) {
4157                tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4158                tmp &= ~0x3;
4159                WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4160        }
4161
4162        /* disable PG */
4163        WREG32(mmRLC_PG_CNTL, 0);
4164
4165        gfx_v8_0_rlc_reset(adev);
4166        gfx_v8_0_init_pg(adev);
4167
4168        if (!adev->pp_enabled) {
4169                if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4170                        /* legacy rlc firmware loading */
4171                        r = gfx_v8_0_rlc_load_microcode(adev);
4172                        if (r)
4173                                return r;
4174                } else {
4175                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4176                                                        AMDGPU_UCODE_ID_RLC_G);
4177                        if (r)
4178                                return -EINVAL;
4179                }
4180        }
4181
4182        gfx_v8_0_rlc_start(adev);
4183
4184        return 0;
4185}
4186
4187static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4188{
4189        int i;
4190        u32 tmp = RREG32(mmCP_ME_CNTL);
4191
4192        if (enable) {
4193                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4194                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4195                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4196        } else {
4197                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4198                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4199                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4200                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4201                        adev->gfx.gfx_ring[i].ready = false;
4202        }
4203        WREG32(mmCP_ME_CNTL, tmp);
4204        udelay(50);
4205}
4206
4207static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4208{
4209        const struct gfx_firmware_header_v1_0 *pfp_hdr;
4210        const struct gfx_firmware_header_v1_0 *ce_hdr;
4211        const struct gfx_firmware_header_v1_0 *me_hdr;
4212        const __le32 *fw_data;
4213        unsigned i, fw_size;
4214
4215        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4216                return -EINVAL;
4217
4218        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4219                adev->gfx.pfp_fw->data;
4220        ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4221                adev->gfx.ce_fw->data;
4222        me_hdr = (const struct gfx_firmware_header_v1_0 *)
4223                adev->gfx.me_fw->data;
4224
4225        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4226        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4227        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4228
4229        gfx_v8_0_cp_gfx_enable(adev, false);
4230
4231        /* PFP */
4232        fw_data = (const __le32 *)
4233                (adev->gfx.pfp_fw->data +
4234                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4235        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4236        WREG32(mmCP_PFP_UCODE_ADDR, 0);
4237        for (i = 0; i < fw_size; i++)
4238                WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4239        WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4240
4241        /* CE */
4242        fw_data = (const __le32 *)
4243                (adev->gfx.ce_fw->data +
4244                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4245        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4246        WREG32(mmCP_CE_UCODE_ADDR, 0);
4247        for (i = 0; i < fw_size; i++)
4248                WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4249        WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4250
4251        /* ME */
4252        fw_data = (const __le32 *)
4253                (adev->gfx.me_fw->data +
4254                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4255        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4256        WREG32(mmCP_ME_RAM_WADDR, 0);
4257        for (i = 0; i < fw_size; i++)
4258                WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4259        WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4260
4261        return 0;
4262}
4263
4264static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4265{
4266        u32 count = 0;
4267        const struct cs_section_def *sect = NULL;
4268        const struct cs_extent_def *ext = NULL;
4269
4270        /* begin clear state */
4271        count += 2;
4272        /* context control state */
4273        count += 3;
4274
4275        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4276                for (ext = sect->section; ext->extent != NULL; ++ext) {
4277                        if (sect->id == SECT_CONTEXT)
4278                                count += 2 + ext->reg_count;
4279                        else
4280                                return 0;
4281                }
4282        }
4283        /* pa_sc_raster_config/pa_sc_raster_config1 */
4284        count += 4;
4285        /* end clear state */
4286        count += 2;
4287        /* clear state */
4288        count += 2;
4289
4290        return count;
4291}
4292
4293static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4294{
4295        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4296        const struct cs_section_def *sect = NULL;
4297        const struct cs_extent_def *ext = NULL;
4298        int r, i;
4299
4300        /* init the CP */
4301        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4302        WREG32(mmCP_ENDIAN_SWAP, 0);
4303        WREG32(mmCP_DEVICE_ID, 1);
4304
4305        gfx_v8_0_cp_gfx_enable(adev, true);
4306
4307        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4308        if (r) {
4309                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4310                return r;
4311        }
4312
4313        /* clear state buffer */
4314        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4315        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4316
4317        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4318        amdgpu_ring_write(ring, 0x80000000);
4319        amdgpu_ring_write(ring, 0x80000000);
4320
4321        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4322                for (ext = sect->section; ext->extent != NULL; ++ext) {
4323                        if (sect->id == SECT_CONTEXT) {
4324                                amdgpu_ring_write(ring,
4325                                       PACKET3(PACKET3_SET_CONTEXT_REG,
4326                                               ext->reg_count));
4327                                amdgpu_ring_write(ring,
4328                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4329                                for (i = 0; i < ext->reg_count; i++)
4330                                        amdgpu_ring_write(ring, ext->extent[i]);
4331                        }
4332                }
4333        }
4334
4335        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4336        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4337        switch (adev->asic_type) {
4338        case CHIP_TONGA:
4339        case CHIP_POLARIS10:
4340                amdgpu_ring_write(ring, 0x16000012);
4341                amdgpu_ring_write(ring, 0x0000002A);
4342                break;
4343        case CHIP_POLARIS11:
4344        case CHIP_POLARIS12:
4345                amdgpu_ring_write(ring, 0x16000012);
4346                amdgpu_ring_write(ring, 0x00000000);
4347                break;
4348        case CHIP_FIJI:
4349                amdgpu_ring_write(ring, 0x3a00161a);
4350                amdgpu_ring_write(ring, 0x0000002e);
4351                break;
4352        case CHIP_CARRIZO:
4353                amdgpu_ring_write(ring, 0x00000002);
4354                amdgpu_ring_write(ring, 0x00000000);
4355                break;
4356        case CHIP_TOPAZ:
4357                amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4358                                0x00000000 : 0x00000002);
4359                amdgpu_ring_write(ring, 0x00000000);
4360                break;
4361        case CHIP_STONEY:
4362                amdgpu_ring_write(ring, 0x00000000);
4363                amdgpu_ring_write(ring, 0x00000000);
4364                break;
4365        default:
4366                BUG();
4367        }
4368
4369        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4370        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4371
4372        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4373        amdgpu_ring_write(ring, 0);
4374
4375        /* init the CE partitions */
4376        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4377        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4378        amdgpu_ring_write(ring, 0x8000);
4379        amdgpu_ring_write(ring, 0x8000);
4380
4381        amdgpu_ring_commit(ring);
4382
4383        return 0;
4384}
4385static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4386{
4387        u32 tmp;
4388        /* no gfx doorbells on iceland */
4389        if (adev->asic_type == CHIP_TOPAZ)
4390                return;
4391
4392        tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393
4394        if (ring->use_doorbell) {
4395                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4396                                DOORBELL_OFFSET, ring->doorbell_index);
4397                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4398                                                DOORBELL_HIT, 0);
4399                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4400                                            DOORBELL_EN, 1);
4401        } else {
4402                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4403        }
4404
4405        WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4406
4407        if (adev->flags & AMD_IS_APU)
4408                return;
4409
4410        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4411                                        DOORBELL_RANGE_LOWER,
4412                                        AMDGPU_DOORBELL_GFX_RING0);
4413        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4414
4415        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4416                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4417}
4418
4419static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4420{
4421        struct amdgpu_ring *ring;
4422        u32 tmp;
4423        u32 rb_bufsz;
4424        u64 rb_addr, rptr_addr, wptr_gpu_addr;
4425        int r;
4426
4427        /* Set the write pointer delay */
4428        WREG32(mmCP_RB_WPTR_DELAY, 0);
4429
4430        /* set the RB to use vmid 0 */
4431        WREG32(mmCP_RB_VMID, 0);
4432
4433        /* Set ring buffer size */
4434        ring = &adev->gfx.gfx_ring[0];
4435        rb_bufsz = order_base_2(ring->ring_size / 8);
4436        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4437        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4438        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4439        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4440#ifdef __BIG_ENDIAN
4441        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4442#endif
4443        WREG32(mmCP_RB0_CNTL, tmp);
4444
4445        /* Initialize the ring buffer's read and write pointers */
4446        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4447        ring->wptr = 0;
4448        WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4449
4450        /* set the wb address wether it's enabled or not */
4451        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4452        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4453        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4454
4455        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4456        WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4457        WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4458        mdelay(1);
4459        WREG32(mmCP_RB0_CNTL, tmp);
4460
4461        rb_addr = ring->gpu_addr >> 8;
4462        WREG32(mmCP_RB0_BASE, rb_addr);
4463        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4464
4465        gfx_v8_0_set_cpg_door_bell(adev, ring);
4466        /* start the ring */
4467        amdgpu_ring_clear_ring(ring);
4468        gfx_v8_0_cp_gfx_start(adev);
4469        ring->ready = true;
4470        r = amdgpu_ring_test_ring(ring);
4471        if (r)
4472                ring->ready = false;
4473
4474        return r;
4475}
4476
4477static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4478{
4479        int i;
4480
4481        if (enable) {
4482                WREG32(mmCP_MEC_CNTL, 0);
4483        } else {
4484                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4485                for (i = 0; i < adev->gfx.num_compute_rings; i++)
4486                        adev->gfx.compute_ring[i].ready = false;
4487                adev->gfx.kiq.ring.ready = false;
4488        }
4489        udelay(50);
4490}
4491
4492static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4493{
4494        const struct gfx_firmware_header_v1_0 *mec_hdr;
4495        const __le32 *fw_data;
4496        unsigned i, fw_size;
4497
4498        if (!adev->gfx.mec_fw)
4499                return -EINVAL;
4500
4501        gfx_v8_0_cp_compute_enable(adev, false);
4502
4503        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4504        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4505
4506        fw_data = (const __le32 *)
4507                (adev->gfx.mec_fw->data +
4508                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4509        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4510
4511        /* MEC1 */
4512        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4513        for (i = 0; i < fw_size; i++)
4514                WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4515        WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4516
4517        /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4518        if (adev->gfx.mec2_fw) {
4519                const struct gfx_firmware_header_v1_0 *mec2_hdr;
4520
4521                mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4522                amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4523
4524                fw_data = (const __le32 *)
4525                        (adev->gfx.mec2_fw->data +
4526                         le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4527                fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4528
4529                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4530                for (i = 0; i < fw_size; i++)
4531                        WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4532                WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4533        }
4534
4535        return 0;
4536}
4537
4538/* KIQ functions */
4539static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4540{
4541        uint32_t tmp;
4542        struct amdgpu_device *adev = ring->adev;
4543
4544        /* tell RLC which is KIQ queue */
4545        tmp = RREG32(mmRLC_CP_SCHEDULERS);
4546        tmp &= 0xffffff00;
4547        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4548        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4549        tmp |= 0x80;
4550        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4551}
4552
4553static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4554{
4555        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4556        uint32_t scratch, tmp = 0;
4557        uint64_t queue_mask = 0;
4558        int r, i;
4559
4560        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4561                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4562                        continue;
4563
4564                /* This situation may be hit in the future if a new HW
4565                 * generation exposes more than 64 queues. If so, the
4566                 * definition of queue_mask needs updating */
4567                if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4568                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4569                        break;
4570                }
4571
4572                queue_mask |= (1ull << i);
4573        }
4574
4575        r = amdgpu_gfx_scratch_get(adev, &scratch);
4576        if (r) {
4577                DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4578                return r;
4579        }
4580        WREG32(scratch, 0xCAFEDEAD);
4581
4582        r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4583        if (r) {
4584                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4585                amdgpu_gfx_scratch_free(adev, scratch);
4586                return r;
4587        }
4588        /* set resources */
4589        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4590        amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4591        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4592        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4593        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4594        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4595        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4596        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4597        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4598                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4599                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4600                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4601
4602                /* map queues */
4603                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4604                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4605                amdgpu_ring_write(kiq_ring,
4606                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4607                amdgpu_ring_write(kiq_ring,
4608                                  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4609                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4610                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4611                                  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4612                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4613                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4614                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4615                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4616        }
4617        /* write to scratch for completion */
4618        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4619        amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4620        amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4621        amdgpu_ring_commit(kiq_ring);
4622
4623        for (i = 0; i < adev->usec_timeout; i++) {
4624                tmp = RREG32(scratch);
4625                if (tmp == 0xDEADBEEF)
4626                        break;
4627                DRM_UDELAY(1);
4628        }
4629        if (i >= adev->usec_timeout) {
4630                DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4631                          scratch, tmp);
4632                r = -EINVAL;
4633        }
4634        amdgpu_gfx_scratch_free(adev, scratch);
4635
4636        return r;
4637}
4638
4639static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4640{
4641        int i, r = 0;
4642
4643        if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4644                WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4645                for (i = 0; i < adev->usec_timeout; i++) {
4646                        if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4647                                break;
4648                        udelay(1);
4649                }
4650                if (i == adev->usec_timeout)
4651                        r = -ETIMEDOUT;
4652        }
4653        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4654        WREG32(mmCP_HQD_PQ_RPTR, 0);
4655        WREG32(mmCP_HQD_PQ_WPTR, 0);
4656
4657        return r;
4658}
4659
4660static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4661{
4662        struct amdgpu_device *adev = ring->adev;
4663        struct vi_mqd *mqd = ring->mqd_ptr;
4664        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4665        uint32_t tmp;
4666
4667        mqd->header = 0xC0310800;
4668        mqd->compute_pipelinestat_enable = 0x00000001;
4669        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4670        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4671        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4672        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4673        mqd->compute_misc_reserved = 0x00000003;
4674        if (!(adev->flags & AMD_IS_APU)) {
4675                mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4676                                             + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4677                mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4678                                             + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4679        }
4680        eop_base_addr = ring->eop_gpu_addr >> 8;
4681        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4682        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4683
4684        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4685        tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4686        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4687                        (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4688
4689        mqd->cp_hqd_eop_control = tmp;
4690
4691        /* enable doorbell? */
4692        tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4693                            CP_HQD_PQ_DOORBELL_CONTROL,
4694                            DOORBELL_EN,
4695                            ring->use_doorbell ? 1 : 0);
4696
4697        mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699        /* set the pointer to the MQD */
4700        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4701        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4702
4703        /* set MQD vmid to 0 */
4704        tmp = RREG32(mmCP_MQD_CONTROL);
4705        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4706        mqd->cp_mqd_control = tmp;
4707
4708        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4709        hqd_gpu_addr = ring->gpu_addr >> 8;
4710        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4711        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4712
4713        /* set up the HQD, this is similar to CP_RB0_CNTL */
4714        tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4715        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4716                            (order_base_2(ring->ring_size / 4) - 1));
4717        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4718                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4719#ifdef __BIG_ENDIAN
4720        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4721#endif
4722        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4723        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4724        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4725        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4726        mqd->cp_hqd_pq_control = tmp;
4727
4728        /* set the wb address whether it's enabled or not */
4729        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4730        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4731        mqd->cp_hqd_pq_rptr_report_addr_hi =
4732                upper_32_bits(wb_gpu_addr) & 0xffff;
4733
4734        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4735        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4736        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4737        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4738
4739        tmp = 0;
4740        /* enable the doorbell if requested */
4741        if (ring->use_doorbell) {
4742                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4743                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4744                                DOORBELL_OFFSET, ring->doorbell_index);
4745
4746                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4747                                         DOORBELL_EN, 1);
4748                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749                                         DOORBELL_SOURCE, 0);
4750                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4751                                         DOORBELL_HIT, 0);
4752        }
4753
4754        mqd->cp_hqd_pq_doorbell_control = tmp;
4755
4756        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4757        ring->wptr = 0;
4758        mqd->cp_hqd_pq_wptr = ring->wptr;
4759        mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4760
4761        /* set the vmid for the queue */
4762        mqd->cp_hqd_vmid = 0;
4763
4764        tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4765        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4766        mqd->cp_hqd_persistent_state = tmp;
4767
4768        /* set MTYPE */
4769        tmp = RREG32(mmCP_HQD_IB_CONTROL);
4770        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4771        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4772        mqd->cp_hqd_ib_control = tmp;
4773
4774        tmp = RREG32(mmCP_HQD_IQ_TIMER);
4775        tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4776        mqd->cp_hqd_iq_timer = tmp;
4777
4778        tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4779        tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4780        mqd->cp_hqd_ctx_save_control = tmp;
4781
4782        /* defaults */
4783        mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4784        mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4785        mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4786        mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4787        mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4788        mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4789        mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4790        mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4791        mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4792        mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4793        mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4794        mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4795        mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4796        mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4797        mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4798
4799        /* activate the queue */
4800        mqd->cp_hqd_active = 1;
4801
4802        return 0;
4803}
4804
4805int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4806                        struct vi_mqd *mqd)
4807{
4808        uint32_t mqd_reg;
4809        uint32_t *mqd_data;
4810
4811        /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4812        mqd_data = &mqd->cp_mqd_base_addr_lo;
4813
4814        /* disable wptr polling */
4815        WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4816
4817        /* program all HQD registers */
4818        for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4819                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4820
4821        /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4822         * This is safe since EOP RPTR==WPTR for any inactive HQD
4823         * on ASICs that do not support context-save.
4824         * EOP writes/reads can start anywhere in the ring.
4825         */
4826        if (adev->asic_type != CHIP_TONGA) {
4827                WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4828                WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4829                WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4830        }
4831
4832        for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4833                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4834
4835        /* activate the HQD */
4836        for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4837                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4838
4839        return 0;
4840}
4841
4842static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4843{
4844        struct amdgpu_device *adev = ring->adev;
4845        struct vi_mqd *mqd = ring->mqd_ptr;
4846        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4847
4848        gfx_v8_0_kiq_setting(ring);
4849
4850        if (adev->gfx.in_reset) { /* for GPU_RESET case */
4851                /* reset MQD to a clean status */
4852                if (adev->gfx.mec.mqd_backup[mqd_idx])
4853                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4854
4855                /* reset ring buffer */
4856                ring->wptr = 0;
4857                amdgpu_ring_clear_ring(ring);
4858                mutex_lock(&adev->srbm_mutex);
4859                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4860                gfx_v8_0_mqd_commit(adev, mqd);
4861                vi_srbm_select(adev, 0, 0, 0, 0);
4862                mutex_unlock(&adev->srbm_mutex);
4863        } else {
4864                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4865                ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4866                ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4867                mutex_lock(&adev->srbm_mutex);
4868                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4869                gfx_v8_0_mqd_init(ring);
4870                gfx_v8_0_mqd_commit(adev, mqd);
4871                vi_srbm_select(adev, 0, 0, 0, 0);
4872                mutex_unlock(&adev->srbm_mutex);
4873
4874                if (adev->gfx.mec.mqd_backup[mqd_idx])
4875                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4876        }
4877
4878        return 0;
4879}
4880
4881static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4882{
4883        struct amdgpu_device *adev = ring->adev;
4884        struct vi_mqd *mqd = ring->mqd_ptr;
4885        int mqd_idx = ring - &adev->gfx.compute_ring[0];
4886
4887        if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4888                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4889                ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4890                ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4891                mutex_lock(&adev->srbm_mutex);
4892                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4893                gfx_v8_0_mqd_init(ring);
4894                vi_srbm_select(adev, 0, 0, 0, 0);
4895                mutex_unlock(&adev->srbm_mutex);
4896
4897                if (adev->gfx.mec.mqd_backup[mqd_idx])
4898                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4899        } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4900                /* reset MQD to a clean status */
4901                if (adev->gfx.mec.mqd_backup[mqd_idx])
4902                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4903                /* reset ring buffer */
4904                ring->wptr = 0;
4905                amdgpu_ring_clear_ring(ring);
4906        } else {
4907                amdgpu_ring_clear_ring(ring);
4908        }
4909        return 0;
4910}
4911
4912static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4913{
4914        if (adev->asic_type > CHIP_TONGA) {
4915                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4916                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4917        }
4918        /* enable doorbells */
4919        WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4920}
4921
4922static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4923{
4924        struct amdgpu_ring *ring = NULL;
4925        int r = 0, i;
4926
4927        gfx_v8_0_cp_compute_enable(adev, true);
4928
4929        ring = &adev->gfx.kiq.ring;
4930
4931        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4932        if (unlikely(r != 0))
4933                goto done;
4934
4935        r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4936        if (!r) {
4937                r = gfx_v8_0_kiq_init_queue(ring);
4938                amdgpu_bo_kunmap(ring->mqd_obj);
4939                ring->mqd_ptr = NULL;
4940        }
4941        amdgpu_bo_unreserve(ring->mqd_obj);
4942        if (r)
4943                goto done;
4944
4945        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4946                ring = &adev->gfx.compute_ring[i];
4947
4948                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4949                if (unlikely(r != 0))
4950                        goto done;
4951                r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4952                if (!r) {
4953                        r = gfx_v8_0_kcq_init_queue(ring);
4954                        amdgpu_bo_kunmap(ring->mqd_obj);
4955                        ring->mqd_ptr = NULL;
4956                }
4957                amdgpu_bo_unreserve(ring->mqd_obj);
4958                if (r)
4959                        goto done;
4960        }
4961
4962        gfx_v8_0_set_mec_doorbell_range(adev);
4963
4964        r = gfx_v8_0_kiq_kcq_enable(adev);
4965        if (r)
4966                goto done;
4967
4968        /* Test KIQ */
4969        ring = &adev->gfx.kiq.ring;
4970        ring->ready = true;
4971        r = amdgpu_ring_test_ring(ring);
4972        if (r) {
4973                ring->ready = false;
4974                goto done;
4975        }
4976
4977        /* Test KCQs */
4978        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979                ring = &adev->gfx.compute_ring[i];
4980                ring->ready = true;
4981                r = amdgpu_ring_test_ring(ring);
4982                if (r)
4983                        ring->ready = false;
4984        }
4985
4986done:
4987        return r;
4988}
4989
4990static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4991{
4992        int r;
4993
4994        if (!(adev->flags & AMD_IS_APU))
4995                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4996
4997        if (!adev->pp_enabled) {
4998                if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4999                        /* legacy firmware loading */
5000                        r = gfx_v8_0_cp_gfx_load_microcode(adev);
5001                        if (r)
5002                                return r;
5003
5004                        r = gfx_v8_0_cp_compute_load_microcode(adev);
5005                        if (r)
5006                                return r;
5007                } else {
5008                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5009                                                        AMDGPU_UCODE_ID_CP_CE);
5010                        if (r)
5011                                return -EINVAL;
5012
5013                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5014                                                        AMDGPU_UCODE_ID_CP_PFP);
5015                        if (r)
5016                                return -EINVAL;
5017
5018                        r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5019                                                        AMDGPU_UCODE_ID_CP_ME);
5020                        if (r)
5021                                return -EINVAL;
5022
5023                        if (adev->asic_type == CHIP_TOPAZ) {
5024                                r = gfx_v8_0_cp_compute_load_microcode(adev);
5025                                if (r)
5026                                        return r;
5027                        } else {
5028                                r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029                                                                                 AMDGPU_UCODE_ID_CP_MEC1);
5030                                if (r)
5031                                        return -EINVAL;
5032                        }
5033                }
5034        }
5035
5036        r = gfx_v8_0_cp_gfx_resume(adev);
5037        if (r)
5038                return r;
5039
5040        r = gfx_v8_0_kiq_resume(adev);
5041        if (r)
5042                return r;
5043
5044        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5045
5046        return 0;
5047}
5048
5049static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5050{
5051        gfx_v8_0_cp_gfx_enable(adev, enable);
5052        gfx_v8_0_cp_compute_enable(adev, enable);
5053}
5054
5055static int gfx_v8_0_hw_init(void *handle)
5056{
5057        int r;
5058        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5059
5060        gfx_v8_0_init_golden_registers(adev);
5061        gfx_v8_0_gpu_init(adev);
5062
5063        r = gfx_v8_0_rlc_resume(adev);
5064        if (r)
5065                return r;
5066
5067        r = gfx_v8_0_cp_resume(adev);
5068
5069        return r;
5070}
5071
5072static int gfx_v8_0_hw_fini(void *handle)
5073{
5074        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075
5076        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5077        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5078        if (amdgpu_sriov_vf(adev)) {
5079                pr_debug("For SRIOV client, shouldn't do anything.\n");
5080                return 0;
5081        }
5082        gfx_v8_0_cp_enable(adev, false);
5083        gfx_v8_0_rlc_stop(adev);
5084
5085        amdgpu_set_powergating_state(adev,
5086                        AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5087
5088        return 0;
5089}
5090
5091static int gfx_v8_0_suspend(void *handle)
5092{
5093        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094        adev->gfx.in_suspend = true;
5095        return gfx_v8_0_hw_fini(adev);
5096}
5097
5098static int gfx_v8_0_resume(void *handle)
5099{
5100        int r;
5101        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102
5103        r = gfx_v8_0_hw_init(adev);
5104        adev->gfx.in_suspend = false;
5105        return r;
5106}
5107
5108static bool gfx_v8_0_is_idle(void *handle)
5109{
5110        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
5112        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5113                return false;
5114        else
5115                return true;
5116}
5117
5118static int gfx_v8_0_wait_for_idle(void *handle)
5119{
5120        unsigned i;
5121        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122
5123        for (i = 0; i < adev->usec_timeout; i++) {
5124                if (gfx_v8_0_is_idle(handle))
5125                        return 0;
5126
5127                udelay(1);
5128        }
5129        return -ETIMEDOUT;
5130}
5131
5132static bool gfx_v8_0_check_soft_reset(void *handle)
5133{
5134        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5136        u32 tmp;
5137
5138        /* GRBM_STATUS */
5139        tmp = RREG32(mmGRBM_STATUS);
5140        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5141                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5142                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5143                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5144                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5145                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5146                   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5147                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5149                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5150                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5151                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5152                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5153        }
5154
5155        /* GRBM_STATUS2 */
5156        tmp = RREG32(mmGRBM_STATUS2);
5157        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5158                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5160
5161        if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5162            REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5163            REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5164                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5165                                                SOFT_RESET_CPF, 1);
5166                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5167                                                SOFT_RESET_CPC, 1);
5168                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5169                                                SOFT_RESET_CPG, 1);
5170                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5171                                                SOFT_RESET_GRBM, 1);
5172        }
5173
5174        /* SRBM_STATUS */
5175        tmp = RREG32(mmSRBM_STATUS);
5176        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5177                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5178                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5179        if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5180                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5181                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5182
5183        if (grbm_soft_reset || srbm_soft_reset) {
5184                adev->gfx.grbm_soft_reset = grbm_soft_reset;
5185                adev->gfx.srbm_soft_reset = srbm_soft_reset;
5186                return true;
5187        } else {
5188                adev->gfx.grbm_soft_reset = 0;
5189                adev->gfx.srbm_soft_reset = 0;
5190                return false;
5191        }
5192}
5193
5194static int gfx_v8_0_pre_soft_reset(void *handle)
5195{
5196        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5197        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5198
5199        if ((!adev->gfx.grbm_soft_reset) &&
5200            (!adev->gfx.srbm_soft_reset))
5201                return 0;
5202
5203        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5204        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5205
5206        /* stop the rlc */
5207        gfx_v8_0_rlc_stop(adev);
5208
5209        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5210            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5211                /* Disable GFX parsing/prefetching */
5212                gfx_v8_0_cp_gfx_enable(adev, false);
5213
5214        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5215            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5216            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5217            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5218                int i;
5219
5220                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5221                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5222
5223                        mutex_lock(&adev->srbm_mutex);
5224                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5225                        gfx_v8_0_deactivate_hqd(adev, 2);
5226                        vi_srbm_select(adev, 0, 0, 0, 0);
5227                        mutex_unlock(&adev->srbm_mutex);
5228                }
5229                /* Disable MEC parsing/prefetching */
5230                gfx_v8_0_cp_compute_enable(adev, false);
5231        }
5232
5233       return 0;
5234}
5235
5236static int gfx_v8_0_soft_reset(void *handle)
5237{
5238        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5240        u32 tmp;
5241
5242        if ((!adev->gfx.grbm_soft_reset) &&
5243            (!adev->gfx.srbm_soft_reset))
5244                return 0;
5245
5246        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5247        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5248
5249        if (grbm_soft_reset || srbm_soft_reset) {
5250                tmp = RREG32(mmGMCON_DEBUG);
5251                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5252                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5253                WREG32(mmGMCON_DEBUG, tmp);
5254                udelay(50);
5255        }
5256
5257        if (grbm_soft_reset) {
5258                tmp = RREG32(mmGRBM_SOFT_RESET);
5259                tmp |= grbm_soft_reset;
5260                dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5261                WREG32(mmGRBM_SOFT_RESET, tmp);
5262                tmp = RREG32(mmGRBM_SOFT_RESET);
5263
5264                udelay(50);
5265
5266                tmp &= ~grbm_soft_reset;
5267                WREG32(mmGRBM_SOFT_RESET, tmp);
5268                tmp = RREG32(mmGRBM_SOFT_RESET);
5269        }
5270
5271        if (srbm_soft_reset) {
5272                tmp = RREG32(mmSRBM_SOFT_RESET);
5273                tmp |= srbm_soft_reset;
5274                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5275                WREG32(mmSRBM_SOFT_RESET, tmp);
5276                tmp = RREG32(mmSRBM_SOFT_RESET);
5277
5278                udelay(50);
5279
5280                tmp &= ~srbm_soft_reset;
5281                WREG32(mmSRBM_SOFT_RESET, tmp);
5282                tmp = RREG32(mmSRBM_SOFT_RESET);
5283        }
5284
5285        if (grbm_soft_reset || srbm_soft_reset) {
5286                tmp = RREG32(mmGMCON_DEBUG);
5287                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5288                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5289                WREG32(mmGMCON_DEBUG, tmp);
5290        }
5291
5292        /* Wait a little for things to settle down */
5293        udelay(50);
5294
5295        return 0;
5296}
5297
5298static int gfx_v8_0_post_soft_reset(void *handle)
5299{
5300        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5301        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5302
5303        if ((!adev->gfx.grbm_soft_reset) &&
5304            (!adev->gfx.srbm_soft_reset))
5305                return 0;
5306
5307        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5308        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5309
5310        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5311            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5312                gfx_v8_0_cp_gfx_resume(adev);
5313
5314        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5315            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5316            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5317            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5318                int i;
5319
5320                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5321                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5322
5323                        mutex_lock(&adev->srbm_mutex);
5324                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5325                        gfx_v8_0_deactivate_hqd(adev, 2);
5326                        vi_srbm_select(adev, 0, 0, 0, 0);
5327                        mutex_unlock(&adev->srbm_mutex);
5328                }
5329                gfx_v8_0_kiq_resume(adev);
5330        }
5331        gfx_v8_0_rlc_start(adev);
5332
5333        return 0;
5334}
5335
5336/**
5337 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5338 *
5339 * @adev: amdgpu_device pointer
5340 *
5341 * Fetches a GPU clock counter snapshot.
5342 * Returns the 64 bit clock counter snapshot.
5343 */
5344static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5345{
5346        uint64_t clock;
5347
5348        mutex_lock(&adev->gfx.gpu_clock_mutex);
5349        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5350        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5351                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5352        mutex_unlock(&adev->gfx.gpu_clock_mutex);
5353        return clock;
5354}
5355
5356static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5357                                          uint32_t vmid,
5358                                          uint32_t gds_base, uint32_t gds_size,
5359                                          uint32_t gws_base, uint32_t gws_size,
5360                                          uint32_t oa_base, uint32_t oa_size)
5361{
5362        gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5363        gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5364
5365        gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5366        gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5367
5368        oa_base = oa_base >> AMDGPU_OA_SHIFT;
5369        oa_size = oa_size >> AMDGPU_OA_SHIFT;
5370
5371        /* GDS Base */
5372        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374                                WRITE_DATA_DST_SEL(0)));
5375        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5376        amdgpu_ring_write(ring, 0);
5377        amdgpu_ring_write(ring, gds_base);
5378
5379        /* GDS Size */
5380        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382                                WRITE_DATA_DST_SEL(0)));
5383        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5384        amdgpu_ring_write(ring, 0);
5385        amdgpu_ring_write(ring, gds_size);
5386
5387        /* GWS */
5388        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5389        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5390                                WRITE_DATA_DST_SEL(0)));
5391        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5392        amdgpu_ring_write(ring, 0);
5393        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5394
5395        /* OA */
5396        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5397        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5398                                WRITE_DATA_DST_SEL(0)));
5399        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5400        amdgpu_ring_write(ring, 0);
5401        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5402}
5403
5404static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5405{
5406        WREG32(mmSQ_IND_INDEX,
5407                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5408                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5409                (address << SQ_IND_INDEX__INDEX__SHIFT) |
5410                (SQ_IND_INDEX__FORCE_READ_MASK));
5411        return RREG32(mmSQ_IND_DATA);
5412}
5413
5414static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5415                           uint32_t wave, uint32_t thread,
5416                           uint32_t regno, uint32_t num, uint32_t *out)
5417{
5418        WREG32(mmSQ_IND_INDEX,
5419                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5420                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5421                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5422                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5423                (SQ_IND_INDEX__FORCE_READ_MASK) |
5424                (SQ_IND_INDEX__AUTO_INCR_MASK));
5425        while (num--)
5426                *(out++) = RREG32(mmSQ_IND_DATA);
5427}
5428
5429static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5430{
5431        /* type 0 wave data */
5432        dst[(*no_fields)++] = 0;
5433        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5434        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5435        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5436        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5437        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5438        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5439        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5440        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5441        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5442        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5443        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5444        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5445        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5446        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5447        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5448        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5449        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5450        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5451}
5452
5453static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5454                                     uint32_t wave, uint32_t start,
5455                                     uint32_t size, uint32_t *dst)
5456{
5457        wave_read_regs(
5458                adev, simd, wave, 0,
5459                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5460}
5461
5462
5463static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5464        .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5465        .select_se_sh = &gfx_v8_0_select_se_sh,
5466        .read_wave_data = &gfx_v8_0_read_wave_data,
5467        .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5468};
5469
5470static int gfx_v8_0_early_init(void *handle)
5471{
5472        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5473
5474        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5475        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5476        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5477        gfx_v8_0_set_ring_funcs(adev);
5478        gfx_v8_0_set_irq_funcs(adev);
5479        gfx_v8_0_set_gds_init(adev);
5480        gfx_v8_0_set_rlc_funcs(adev);
5481
5482        return 0;
5483}
5484
5485static int gfx_v8_0_late_init(void *handle)
5486{
5487        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5488        int r;
5489
5490        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5491        if (r)
5492                return r;
5493
5494        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5495        if (r)
5496                return r;
5497
5498        /* requires IBs so do in late init after IB pool is initialized */
5499        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5500        if (r)
5501                return r;
5502
5503        amdgpu_set_powergating_state(adev,
5504                        AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5505
5506        return 0;
5507}
5508
5509static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5510                                                       bool enable)
5511{
5512        if ((adev->asic_type == CHIP_POLARIS11) ||
5513            (adev->asic_type == CHIP_POLARIS12))
5514                /* Send msg to SMU via Powerplay */
5515                amdgpu_set_powergating_state(adev,
5516                                             AMD_IP_BLOCK_TYPE_SMC,
5517                                             enable ?
5518                                             AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5519
5520        WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5521}
5522
5523static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5524                                                        bool enable)
5525{
5526        WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5527}
5528
5529static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5530                bool enable)
5531{
5532        WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5533}
5534
5535static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5536                                          bool enable)
5537{
5538        WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5539}
5540
5541static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5542                                                bool enable)
5543{
5544        WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5545
5546        /* Read any GFX register to wake up GFX. */
5547        if (!enable)
5548                RREG32(mmDB_RENDER_CONTROL);
5549}
5550
5551static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5552                                          bool enable)
5553{
5554        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5555                cz_enable_gfx_cg_power_gating(adev, true);
5556                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5557                        cz_enable_gfx_pipeline_power_gating(adev, true);
5558        } else {
5559                cz_enable_gfx_cg_power_gating(adev, false);
5560                cz_enable_gfx_pipeline_power_gating(adev, false);
5561        }
5562}
5563
5564static int gfx_v8_0_set_powergating_state(void *handle,
5565                                          enum amd_powergating_state state)
5566{
5567        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5568        bool enable = (state == AMD_PG_STATE_GATE);
5569
5570        if (amdgpu_sriov_vf(adev))
5571                return 0;
5572
5573        switch (adev->asic_type) {
5574        case CHIP_CARRIZO:
5575        case CHIP_STONEY:
5576
5577                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5578                        cz_enable_sck_slow_down_on_power_up(adev, true);
5579                        cz_enable_sck_slow_down_on_power_down(adev, true);
5580                } else {
5581                        cz_enable_sck_slow_down_on_power_up(adev, false);
5582                        cz_enable_sck_slow_down_on_power_down(adev, false);
5583                }
5584                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5585                        cz_enable_cp_power_gating(adev, true);
5586                else
5587                        cz_enable_cp_power_gating(adev, false);
5588
5589                cz_update_gfx_cg_power_gating(adev, enable);
5590
5591                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593                else
5594                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598                else
5599                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600                break;
5601        case CHIP_POLARIS11:
5602        case CHIP_POLARIS12:
5603                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5604                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5605                else
5606                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5607
5608                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5609                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5610                else
5611                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5612
5613                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5614                        polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5615                else
5616                        polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5617                break;
5618        default:
5619                break;
5620        }
5621
5622        return 0;
5623}
5624
5625static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5626{
5627        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5628        int data;
5629
5630        if (amdgpu_sriov_vf(adev))
5631                *flags = 0;
5632
5633        /* AMD_CG_SUPPORT_GFX_MGCG */
5634        data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5635        if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5636                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5637
5638        /* AMD_CG_SUPPORT_GFX_CGLG */
5639        data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5640        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5641                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5642
5643        /* AMD_CG_SUPPORT_GFX_CGLS */
5644        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5645                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5646
5647        /* AMD_CG_SUPPORT_GFX_CGTS */
5648        data = RREG32(mmCGTS_SM_CTRL_REG);
5649        if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5650                *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5651
5652        /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5653        if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5654                *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5655
5656        /* AMD_CG_SUPPORT_GFX_RLC_LS */
5657        data = RREG32(mmRLC_MEM_SLP_CNTL);
5658        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5659                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5660
5661        /* AMD_CG_SUPPORT_GFX_CP_LS */
5662        data = RREG32(mmCP_MEM_SLP_CNTL);
5663        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5664                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5665}
5666
5667static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5668                                     uint32_t reg_addr, uint32_t cmd)
5669{
5670        uint32_t data;
5671
5672        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5673
5674        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5675        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5676
5677        data = RREG32(mmRLC_SERDES_WR_CTRL);
5678        if (adev->asic_type == CHIP_STONEY)
5679                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5680                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5681                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5682                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5683                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5684                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5685                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5686                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5687                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5688        else
5689                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5690                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5691                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5692                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5693                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5694                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5695                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5696                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5697                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5698                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5699                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5700        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5701                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5702                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5703                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5704
5705        WREG32(mmRLC_SERDES_WR_CTRL, data);
5706}
5707
5708#define MSG_ENTER_RLC_SAFE_MODE     1
5709#define MSG_EXIT_RLC_SAFE_MODE      0
5710#define RLC_GPR_REG2__REQ_MASK 0x00000001
5711#define RLC_GPR_REG2__REQ__SHIFT 0
5712#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5713#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5714
5715static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5716{
5717        u32 data;
5718        unsigned i;
5719
5720        data = RREG32(mmRLC_CNTL);
5721        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5722                return;
5723
5724        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5725                data |= RLC_SAFE_MODE__CMD_MASK;
5726                data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5727                data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5728                WREG32(mmRLC_SAFE_MODE, data);
5729
5730                for (i = 0; i < adev->usec_timeout; i++) {
5731                        if ((RREG32(mmRLC_GPM_STAT) &
5732                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5734                            (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5735                             RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5736                                break;
5737                        udelay(1);
5738                }
5739
5740                for (i = 0; i < adev->usec_timeout; i++) {
5741                        if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5742                                break;
5743                        udelay(1);
5744                }
5745                adev->gfx.rlc.in_safe_mode = true;
5746        }
5747}
5748
5749static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5750{
5751        u32 data = 0;
5752        unsigned i;
5753
5754        data = RREG32(mmRLC_CNTL);
5755        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5756                return;
5757
5758        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5759                if (adev->gfx.rlc.in_safe_mode) {
5760                        data |= RLC_SAFE_MODE__CMD_MASK;
5761                        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5762                        WREG32(mmRLC_SAFE_MODE, data);
5763                        adev->gfx.rlc.in_safe_mode = false;
5764                }
5765        }
5766
5767        for (i = 0; i < adev->usec_timeout; i++) {
5768                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5769                        break;
5770                udelay(1);
5771        }
5772}
5773
5774static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775        .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776        .exit_safe_mode = iceland_exit_rlc_safe_mode
5777};
5778
5779static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5780                                                      bool enable)
5781{
5782        uint32_t temp, data;
5783
5784        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5785
5786        /* It is disabled by HW by default */
5787        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5788                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5789                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5790                                /* 1 - RLC memory Light sleep */
5791                                WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5792
5793                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5794                                WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5795                }
5796
5797                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5798                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799                if (adev->flags & AMD_IS_APU)
5800                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5801                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5802                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5803                else
5804                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5805                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5806                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5807                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5808
5809                if (temp != data)
5810                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5811
5812                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5813                gfx_v8_0_wait_for_rlc_serdes(adev);
5814
5815                /* 5 - clear mgcg override */
5816                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5817
5818                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5819                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5820                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5821                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5822                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5823                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5824                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5825                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5826                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5827                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5828                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5829                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5830                        if (temp != data)
5831                                WREG32(mmCGTS_SM_CTRL_REG, data);
5832                }
5833                udelay(50);
5834
5835                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5836                gfx_v8_0_wait_for_rlc_serdes(adev);
5837        } else {
5838                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5839                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5840                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5841                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5842                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5843                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5844                if (temp != data)
5845                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5846
5847                /* 2 - disable MGLS in RLC */
5848                data = RREG32(mmRLC_MEM_SLP_CNTL);
5849                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5850                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5851                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5852                }
5853
5854                /* 3 - disable MGLS in CP */
5855                data = RREG32(mmCP_MEM_SLP_CNTL);
5856                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5857                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5858                        WREG32(mmCP_MEM_SLP_CNTL, data);
5859                }
5860
5861                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5862                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5864                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5865                if (temp != data)
5866                        WREG32(mmCGTS_SM_CTRL_REG, data);
5867
5868                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5869                gfx_v8_0_wait_for_rlc_serdes(adev);
5870
5871                /* 6 - set mgcg override */
5872                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5873
5874                udelay(50);
5875
5876                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877                gfx_v8_0_wait_for_rlc_serdes(adev);
5878        }
5879
5880        adev->gfx.rlc.funcs->exit_safe_mode(adev);
5881}
5882
5883static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5884                                                      bool enable)
5885{
5886        uint32_t temp, temp1, data, data1;
5887
5888        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5889
5890        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5891
5892        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5893                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5894                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5895                if (temp1 != data1)
5896                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5897
5898                /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5899                gfx_v8_0_wait_for_rlc_serdes(adev);
5900
5901                /* 2 - clear cgcg override */
5902                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5903
5904                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905                gfx_v8_0_wait_for_rlc_serdes(adev);
5906
5907                /* 3 - write cmd to set CGLS */
5908                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5909
5910                /* 4 - enable cgcg */
5911                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5912
5913                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5914                        /* enable cgls*/
5915                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5916
5917                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5918                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5919
5920                        if (temp1 != data1)
5921                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5922                } else {
5923                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5924                }
5925
5926                if (temp != data)
5927                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5928
5929                /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5930                 * Cmp_busy/GFX_Idle interrupts
5931                 */
5932                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5933        } else {
5934                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5935                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5936
5937                /* TEST CGCG */
5938                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5940                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5941                if (temp1 != data1)
5942                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5943
5944                /* read gfx register to wake up cgcg */
5945                RREG32(mmCB_CGTT_SCLK_CTRL);
5946                RREG32(mmCB_CGTT_SCLK_CTRL);
5947                RREG32(mmCB_CGTT_SCLK_CTRL);
5948                RREG32(mmCB_CGTT_SCLK_CTRL);
5949
5950                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951                gfx_v8_0_wait_for_rlc_serdes(adev);
5952
5953                /* write cmd to Set CGCG Overrride */
5954                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5955
5956                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5957                gfx_v8_0_wait_for_rlc_serdes(adev);
5958
5959                /* write cmd to Clear CGLS */
5960                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5961
5962                /* disable cgcg, cgls should be disabled too. */
5963                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5964                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5965                if (temp != data)
5966                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5967                /* enable interrupts again for PG */
5968                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5969        }
5970
5971        gfx_v8_0_wait_for_rlc_serdes(adev);
5972
5973        adev->gfx.rlc.funcs->exit_safe_mode(adev);
5974}
5975static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5976                                            bool enable)
5977{
5978        if (enable) {
5979                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980                 * ===  MGCG + MGLS + TS(CG/LS) ===
5981                 */
5982                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5984        } else {
5985                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986                 * ===  CGCG + CGLS ===
5987                 */
5988                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5990        }
5991        return 0;
5992}
5993
5994static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995                                          enum amd_clockgating_state state)
5996{
5997        uint32_t msg_id, pp_state = 0;
5998        uint32_t pp_support_state = 0;
5999        void *pp_handle = adev->powerplay.pp_handle;
6000
6001        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6002                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6003                        pp_support_state = PP_STATE_SUPPORT_LS;
6004                        pp_state = PP_STATE_LS;
6005                }
6006                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6007                        pp_support_state |= PP_STATE_SUPPORT_CG;
6008                        pp_state |= PP_STATE_CG;
6009                }
6010                if (state == AMD_CG_STATE_UNGATE)
6011                        pp_state = 0;
6012
6013                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014                                PP_BLOCK_GFX_CG,
6015                                pp_support_state,
6016                                pp_state);
6017                amd_set_clockgating_by_smu(pp_handle, msg_id);
6018        }
6019
6020        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6021                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6022                        pp_support_state = PP_STATE_SUPPORT_LS;
6023                        pp_state = PP_STATE_LS;
6024                }
6025
6026                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6027                        pp_support_state |= PP_STATE_SUPPORT_CG;
6028                        pp_state |= PP_STATE_CG;
6029                }
6030
6031                if (state == AMD_CG_STATE_UNGATE)
6032                        pp_state = 0;
6033
6034                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6035                                PP_BLOCK_GFX_MG,
6036                                pp_support_state,
6037                                pp_state);
6038                amd_set_clockgating_by_smu(pp_handle, msg_id);
6039        }
6040
6041        return 0;
6042}
6043
6044static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6045                                          enum amd_clockgating_state state)
6046{
6047
6048        uint32_t msg_id, pp_state = 0;
6049        uint32_t pp_support_state = 0;
6050        void *pp_handle = adev->powerplay.pp_handle;
6051
6052        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6053                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6054                        pp_support_state = PP_STATE_SUPPORT_LS;
6055                        pp_state = PP_STATE_LS;
6056                }
6057                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6058                        pp_support_state |= PP_STATE_SUPPORT_CG;
6059                        pp_state |= PP_STATE_CG;
6060                }
6061                if (state == AMD_CG_STATE_UNGATE)
6062                        pp_state = 0;
6063
6064                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6065                                PP_BLOCK_GFX_CG,
6066                                pp_support_state,
6067                                pp_state);
6068                amd_set_clockgating_by_smu(pp_handle, msg_id);
6069        }
6070
6071        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6072                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6073                        pp_support_state = PP_STATE_SUPPORT_LS;
6074                        pp_state = PP_STATE_LS;
6075                }
6076                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6077                        pp_support_state |= PP_STATE_SUPPORT_CG;
6078                        pp_state |= PP_STATE_CG;
6079                }
6080                if (state == AMD_CG_STATE_UNGATE)
6081                        pp_state = 0;
6082
6083                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6084                                PP_BLOCK_GFX_3D,
6085                                pp_support_state,
6086                                pp_state);
6087                amd_set_clockgating_by_smu(pp_handle, msg_id);
6088        }
6089
6090        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092                        pp_support_state = PP_STATE_SUPPORT_LS;
6093                        pp_state = PP_STATE_LS;
6094                }
6095
6096                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097                        pp_support_state |= PP_STATE_SUPPORT_CG;
6098                        pp_state |= PP_STATE_CG;
6099                }
6100
6101                if (state == AMD_CG_STATE_UNGATE)
6102                        pp_state = 0;
6103
6104                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6105                                PP_BLOCK_GFX_MG,
6106                                pp_support_state,
6107                                pp_state);
6108                amd_set_clockgating_by_smu(pp_handle, msg_id);
6109        }
6110
6111        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6112                pp_support_state = PP_STATE_SUPPORT_LS;
6113
6114                if (state == AMD_CG_STATE_UNGATE)
6115                        pp_state = 0;
6116                else
6117                        pp_state = PP_STATE_LS;
6118
6119                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6120                                PP_BLOCK_GFX_RLC,
6121                                pp_support_state,
6122                                pp_state);
6123                amd_set_clockgating_by_smu(pp_handle, msg_id);
6124        }
6125
6126        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6127                pp_support_state = PP_STATE_SUPPORT_LS;
6128
6129                if (state == AMD_CG_STATE_UNGATE)
6130                        pp_state = 0;
6131                else
6132                        pp_state = PP_STATE_LS;
6133                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6134                        PP_BLOCK_GFX_CP,
6135                        pp_support_state,
6136                        pp_state);
6137                amd_set_clockgating_by_smu(pp_handle, msg_id);
6138        }
6139
6140        return 0;
6141}
6142
6143static int gfx_v8_0_set_clockgating_state(void *handle,
6144                                          enum amd_clockgating_state state)
6145{
6146        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6147
6148        if (amdgpu_sriov_vf(adev))
6149                return 0;
6150
6151        switch (adev->asic_type) {
6152        case CHIP_FIJI:
6153        case CHIP_CARRIZO:
6154        case CHIP_STONEY:
6155                gfx_v8_0_update_gfx_clock_gating(adev,
6156                                                 state == AMD_CG_STATE_GATE);
6157                break;
6158        case CHIP_TONGA:
6159                gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6160                break;
6161        case CHIP_POLARIS10:
6162        case CHIP_POLARIS11:
6163        case CHIP_POLARIS12:
6164                gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6165                break;
6166        default:
6167                break;
6168        }
6169        return 0;
6170}
6171
6172static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6173{
6174        return ring->adev->wb.wb[ring->rptr_offs];
6175}
6176
6177static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6178{
6179        struct amdgpu_device *adev = ring->adev;
6180
6181        if (ring->use_doorbell)
6182                /* XXX check if swapping is necessary on BE */
6183                return ring->adev->wb.wb[ring->wptr_offs];
6184        else
6185                return RREG32(mmCP_RB0_WPTR);
6186}
6187
6188static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6189{
6190        struct amdgpu_device *adev = ring->adev;
6191
6192        if (ring->use_doorbell) {
6193                /* XXX check if swapping is necessary on BE */
6194                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6195                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6196        } else {
6197                WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6198                (void)RREG32(mmCP_RB0_WPTR);
6199        }
6200}
6201
6202static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6203{
6204        u32 ref_and_mask, reg_mem_engine;
6205
6206        if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6207            (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6208                switch (ring->me) {
6209                case 1:
6210                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6211                        break;
6212                case 2:
6213                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6214                        break;
6215                default:
6216                        return;
6217                }
6218                reg_mem_engine = 0;
6219        } else {
6220                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6221                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6222        }
6223
6224        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6225        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6226                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6227                                 reg_mem_engine));
6228        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6229        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6230        amdgpu_ring_write(ring, ref_and_mask);
6231        amdgpu_ring_write(ring, ref_and_mask);
6232        amdgpu_ring_write(ring, 0x20); /* poll interval */
6233}
6234
6235static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6236{
6237        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6238        amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6239                EVENT_INDEX(4));
6240
6241        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6242        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6243                EVENT_INDEX(0));
6244}
6245
6246
6247static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6248{
6249        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6251                                 WRITE_DATA_DST_SEL(0) |
6252                                 WR_CONFIRM));
6253        amdgpu_ring_write(ring, mmHDP_DEBUG0);
6254        amdgpu_ring_write(ring, 0);
6255        amdgpu_ring_write(ring, 1);
6256
6257}
6258
6259static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6260                                      struct amdgpu_ib *ib,
6261                                      unsigned vm_id, bool ctx_switch)
6262{
6263        u32 header, control = 0;
6264
6265        if (ib->flags & AMDGPU_IB_FLAG_CE)
6266                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6267        else
6268                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6269
6270        control |= ib->length_dw | (vm_id << 24);
6271
6272        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6273                control |= INDIRECT_BUFFER_PRE_ENB(1);
6274
6275                if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6276                        gfx_v8_0_ring_emit_de_meta(ring);
6277        }
6278
6279        amdgpu_ring_write(ring, header);
6280        amdgpu_ring_write(ring,
6281#ifdef __BIG_ENDIAN
6282                          (2 << 0) |
6283#endif
6284                          (ib->gpu_addr & 0xFFFFFFFC));
6285        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6286        amdgpu_ring_write(ring, control);
6287}
6288
6289static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6290                                          struct amdgpu_ib *ib,
6291                                          unsigned vm_id, bool ctx_switch)
6292{
6293        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6294
6295        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6296        amdgpu_ring_write(ring,
6297#ifdef __BIG_ENDIAN
6298                                (2 << 0) |
6299#endif
6300                                (ib->gpu_addr & 0xFFFFFFFC));
6301        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6302        amdgpu_ring_write(ring, control);
6303}
6304
6305static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6306                                         u64 seq, unsigned flags)
6307{
6308        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6309        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6310
6311        /* EVENT_WRITE_EOP - flush caches, send int */
6312        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6313        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6314                                 EOP_TC_ACTION_EN |
6315                                 EOP_TC_WB_ACTION_EN |
6316                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6317                                 EVENT_INDEX(5)));
6318        amdgpu_ring_write(ring, addr & 0xfffffffc);
6319        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6320                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321        amdgpu_ring_write(ring, lower_32_bits(seq));
6322        amdgpu_ring_write(ring, upper_32_bits(seq));
6323
6324}
6325
6326static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6327{
6328        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6329        uint32_t seq = ring->fence_drv.sync_seq;
6330        uint64_t addr = ring->fence_drv.gpu_addr;
6331
6332        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6333        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6334                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6335                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6336        amdgpu_ring_write(ring, addr & 0xfffffffc);
6337        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6338        amdgpu_ring_write(ring, seq);
6339        amdgpu_ring_write(ring, 0xffffffff);
6340        amdgpu_ring_write(ring, 4); /* poll interval */
6341}
6342
6343static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6344                                        unsigned vm_id, uint64_t pd_addr)
6345{
6346        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6347
6348        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6349        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6350                                 WRITE_DATA_DST_SEL(0)) |
6351                                 WR_CONFIRM);
6352        if (vm_id < 8) {
6353                amdgpu_ring_write(ring,
6354                                  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6355        } else {
6356                amdgpu_ring_write(ring,
6357                                  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6358        }
6359        amdgpu_ring_write(ring, 0);
6360        amdgpu_ring_write(ring, pd_addr >> 12);
6361
6362        /* bits 0-15 are the VM contexts0-15 */
6363        /* invalidate the cache */
6364        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6365        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6366                                 WRITE_DATA_DST_SEL(0)));
6367        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6368        amdgpu_ring_write(ring, 0);
6369        amdgpu_ring_write(ring, 1 << vm_id);
6370
6371        /* wait for the invalidate to complete */
6372        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6373        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6374                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6375                                 WAIT_REG_MEM_ENGINE(0))); /* me */
6376        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6377        amdgpu_ring_write(ring, 0);
6378        amdgpu_ring_write(ring, 0); /* ref */
6379        amdgpu_ring_write(ring, 0); /* mask */
6380        amdgpu_ring_write(ring, 0x20); /* poll interval */
6381
6382        /* compute doesn't have PFP */
6383        if (usepfp) {
6384                /* sync PFP to ME, otherwise we might get invalid PFP reads */
6385                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6386                amdgpu_ring_write(ring, 0x0);
6387        }
6388}
6389
6390static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6391{
6392        return ring->adev->wb.wb[ring->wptr_offs];
6393}
6394
6395static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6396{
6397        struct amdgpu_device *adev = ring->adev;
6398
6399        /* XXX check if swapping is necessary on BE */
6400        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6401        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6402}
6403
6404static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6405                                             u64 addr, u64 seq,
6406                                             unsigned flags)
6407{
6408        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6409        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6410
6411        /* RELEASE_MEM - flush caches, send int */
6412        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6413        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6414                                 EOP_TC_ACTION_EN |
6415                                 EOP_TC_WB_ACTION_EN |
6416                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6417                                 EVENT_INDEX(5)));
6418        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6419        amdgpu_ring_write(ring, addr & 0xfffffffc);
6420        amdgpu_ring_write(ring, upper_32_bits(addr));
6421        amdgpu_ring_write(ring, lower_32_bits(seq));
6422        amdgpu_ring_write(ring, upper_32_bits(seq));
6423}
6424
6425static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6426                                         u64 seq, unsigned int flags)
6427{
6428        /* we only allocate 32bit for each seq wb address */
6429        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6430
6431        /* write fence seq to the "addr" */
6432        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6433        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6434                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6435        amdgpu_ring_write(ring, lower_32_bits(addr));
6436        amdgpu_ring_write(ring, upper_32_bits(addr));
6437        amdgpu_ring_write(ring, lower_32_bits(seq));
6438
6439        if (flags & AMDGPU_FENCE_FLAG_INT) {
6440                /* set register to trigger INT */
6441                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6443                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6444                amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6445                amdgpu_ring_write(ring, 0);
6446                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6447        }
6448}
6449
6450static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6451{
6452        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6453        amdgpu_ring_write(ring, 0);
6454}
6455
6456static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6457{
6458        uint32_t dw2 = 0;
6459
6460        if (amdgpu_sriov_vf(ring->adev))
6461                gfx_v8_0_ring_emit_ce_meta(ring);
6462
6463        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6464        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6465                gfx_v8_0_ring_emit_vgt_flush(ring);
6466                /* set load_global_config & load_global_uconfig */
6467                dw2 |= 0x8001;
6468                /* set load_cs_sh_regs */
6469                dw2 |= 0x01000000;
6470                /* set load_per_context_state & load_gfx_sh_regs for GFX */
6471                dw2 |= 0x10002;
6472
6473                /* set load_ce_ram if preamble presented */
6474                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6475                        dw2 |= 0x10000000;
6476        } else {
6477                /* still load_ce_ram if this is the first time preamble presented
6478                 * although there is no context switch happens.
6479                 */
6480                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6481                        dw2 |= 0x10000000;
6482        }
6483
6484        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6485        amdgpu_ring_write(ring, dw2);
6486        amdgpu_ring_write(ring, 0);
6487}
6488
6489static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6490{
6491        unsigned ret;
6492
6493        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6494        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6495        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6496        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6497        ret = ring->wptr & ring->buf_mask;
6498        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6499        return ret;
6500}
6501
6502static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6503{
6504        unsigned cur;
6505
6506        BUG_ON(offset > ring->buf_mask);
6507        BUG_ON(ring->ring[offset] != 0x55aa55aa);
6508
6509        cur = (ring->wptr & ring->buf_mask) - 1;
6510        if (likely(cur > offset))
6511                ring->ring[offset] = cur - offset;
6512        else
6513                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6514}
6515
6516static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6517{
6518        struct amdgpu_device *adev = ring->adev;
6519
6520        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6521        amdgpu_ring_write(ring, 0 |     /* src: register*/
6522                                (5 << 8) |      /* dst: memory */
6523                                (1 << 20));     /* write confirm */
6524        amdgpu_ring_write(ring, reg);
6525        amdgpu_ring_write(ring, 0);
6526        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6527                                adev->virt.reg_val_offs * 4));
6528        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6529                                adev->virt.reg_val_offs * 4));
6530}
6531
6532static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6533                                  uint32_t val)
6534{
6535        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6536        amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6537        amdgpu_ring_write(ring, reg);
6538        amdgpu_ring_write(ring, 0);
6539        amdgpu_ring_write(ring, val);
6540}
6541
6542static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6543                                                 enum amdgpu_interrupt_state state)
6544{
6545        WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6546                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6547}
6548
6549static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6550                                                     int me, int pipe,
6551                                                     enum amdgpu_interrupt_state state)
6552{
6553        u32 mec_int_cntl, mec_int_cntl_reg;
6554
6555        /*
6556         * amdgpu controls only the first MEC. That's why this function only
6557         * handles the setting of interrupts for this specific MEC. All other
6558         * pipes' interrupts are set by amdkfd.
6559         */
6560
6561        if (me == 1) {
6562                switch (pipe) {
6563                case 0:
6564                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6565                        break;
6566                case 1:
6567                        mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6568                        break;
6569                case 2:
6570                        mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6571                        break;
6572                case 3:
6573                        mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6574                        break;
6575                default:
6576                        DRM_DEBUG("invalid pipe %d\n", pipe);
6577                        return;
6578                }
6579        } else {
6580                DRM_DEBUG("invalid me %d\n", me);
6581                return;
6582        }
6583
6584        switch (state) {
6585        case AMDGPU_IRQ_STATE_DISABLE:
6586                mec_int_cntl = RREG32(mec_int_cntl_reg);
6587                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588                WREG32(mec_int_cntl_reg, mec_int_cntl);
6589                break;
6590        case AMDGPU_IRQ_STATE_ENABLE:
6591                mec_int_cntl = RREG32(mec_int_cntl_reg);
6592                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6593                WREG32(mec_int_cntl_reg, mec_int_cntl);
6594                break;
6595        default:
6596                break;
6597        }
6598}
6599
6600static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6601                                             struct amdgpu_irq_src *source,
6602                                             unsigned type,
6603                                             enum amdgpu_interrupt_state state)
6604{
6605        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6606                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6607
6608        return 0;
6609}
6610
6611static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6612                                              struct amdgpu_irq_src *source,
6613                                              unsigned type,
6614                                              enum amdgpu_interrupt_state state)
6615{
6616        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6617                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6618
6619        return 0;
6620}
6621
6622static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6623                                            struct amdgpu_irq_src *src,
6624                                            unsigned type,
6625                                            enum amdgpu_interrupt_state state)
6626{
6627        switch (type) {
6628        case AMDGPU_CP_IRQ_GFX_EOP:
6629                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6630                break;
6631        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6632                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6633                break;
6634        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6635                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6636                break;
6637        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6638                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6639                break;
6640        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6641                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6642                break;
6643        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6644                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6645                break;
6646        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6647                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6648                break;
6649        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6650                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6651                break;
6652        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6653                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6654                break;
6655        default:
6656                break;
6657        }
6658        return 0;
6659}
6660
6661static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6662                            struct amdgpu_irq_src *source,
6663                            struct amdgpu_iv_entry *entry)
6664{
6665        int i;
6666        u8 me_id, pipe_id, queue_id;
6667        struct amdgpu_ring *ring;
6668
6669        DRM_DEBUG("IH: CP EOP\n");
6670        me_id = (entry->ring_id & 0x0c) >> 2;
6671        pipe_id = (entry->ring_id & 0x03) >> 0;
6672        queue_id = (entry->ring_id & 0x70) >> 4;
6673
6674        switch (me_id) {
6675        case 0:
6676                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6677                break;
6678        case 1:
6679        case 2:
6680                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6681                        ring = &adev->gfx.compute_ring[i];
6682                        /* Per-queue interrupt is supported for MEC starting from VI.
6683                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6684                          */
6685                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6686                                amdgpu_fence_process(ring);
6687                }
6688                break;
6689        }
6690        return 0;
6691}
6692
6693static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6694                                 struct amdgpu_irq_src *source,
6695                                 struct amdgpu_iv_entry *entry)
6696{
6697        DRM_ERROR("Illegal register access in command stream\n");
6698        schedule_work(&adev->reset_work);
6699        return 0;
6700}
6701
6702static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6703                                  struct amdgpu_irq_src *source,
6704                                  struct amdgpu_iv_entry *entry)
6705{
6706        DRM_ERROR("Illegal instruction in command stream\n");
6707        schedule_work(&adev->reset_work);
6708        return 0;
6709}
6710
6711static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6712                                            struct amdgpu_irq_src *src,
6713                                            unsigned int type,
6714                                            enum amdgpu_interrupt_state state)
6715{
6716        struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6717
6718        switch (type) {
6719        case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6720                WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6721                             state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6722                if (ring->me == 1)
6723                        WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6724                                     ring->pipe,
6725                                     GENERIC2_INT_ENABLE,
6726                                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6727                else
6728                        WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6729                                     ring->pipe,
6730                                     GENERIC2_INT_ENABLE,
6731                                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6732                break;
6733        default:
6734                BUG(); /* kiq only support GENERIC2_INT now */
6735                break;
6736        }
6737        return 0;
6738}
6739
6740static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6741                            struct amdgpu_irq_src *source,
6742                            struct amdgpu_iv_entry *entry)
6743{
6744        u8 me_id, pipe_id, queue_id;
6745        struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6746
6747        me_id = (entry->ring_id & 0x0c) >> 2;
6748        pipe_id = (entry->ring_id & 0x03) >> 0;
6749        queue_id = (entry->ring_id & 0x70) >> 4;
6750        DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6751                   me_id, pipe_id, queue_id);
6752
6753        amdgpu_fence_process(ring);
6754        return 0;
6755}
6756
6757static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6758        .name = "gfx_v8_0",
6759        .early_init = gfx_v8_0_early_init,
6760        .late_init = gfx_v8_0_late_init,
6761        .sw_init = gfx_v8_0_sw_init,
6762        .sw_fini = gfx_v8_0_sw_fini,
6763        .hw_init = gfx_v8_0_hw_init,
6764        .hw_fini = gfx_v8_0_hw_fini,
6765        .suspend = gfx_v8_0_suspend,
6766        .resume = gfx_v8_0_resume,
6767        .is_idle = gfx_v8_0_is_idle,
6768        .wait_for_idle = gfx_v8_0_wait_for_idle,
6769        .check_soft_reset = gfx_v8_0_check_soft_reset,
6770        .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6771        .soft_reset = gfx_v8_0_soft_reset,
6772        .post_soft_reset = gfx_v8_0_post_soft_reset,
6773        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6774        .set_powergating_state = gfx_v8_0_set_powergating_state,
6775        .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6776};
6777
6778static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6779        .type = AMDGPU_RING_TYPE_GFX,
6780        .align_mask = 0xff,
6781        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6782        .support_64bit_ptrs = false,
6783        .get_rptr = gfx_v8_0_ring_get_rptr,
6784        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6785        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6786        .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6787                5 +  /* COND_EXEC */
6788                7 +  /* PIPELINE_SYNC */
6789                19 + /* VM_FLUSH */
6790                8 +  /* FENCE for VM_FLUSH */
6791                20 + /* GDS switch */
6792                4 + /* double SWITCH_BUFFER,
6793                       the first COND_EXEC jump to the place just
6794                           prior to this double SWITCH_BUFFER  */
6795                5 + /* COND_EXEC */
6796                7 +      /*     HDP_flush */
6797                4 +      /*     VGT_flush */
6798                14 + /* CE_META */
6799                31 + /* DE_META */
6800                3 + /* CNTX_CTRL */
6801                5 + /* HDP_INVL */
6802                8 + 8 + /* FENCE x2 */
6803                2, /* SWITCH_BUFFER */
6804        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6805        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6806        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6807        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6808        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6809        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6810        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6811        .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6812        .test_ring = gfx_v8_0_ring_test_ring,
6813        .test_ib = gfx_v8_0_ring_test_ib,
6814        .insert_nop = amdgpu_ring_insert_nop,
6815        .pad_ib = amdgpu_ring_generic_pad_ib,
6816        .emit_switch_buffer = gfx_v8_ring_emit_sb,
6817        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6818        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6819        .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6820};
6821
6822static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6823        .type = AMDGPU_RING_TYPE_COMPUTE,
6824        .align_mask = 0xff,
6825        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6826        .support_64bit_ptrs = false,
6827        .get_rptr = gfx_v8_0_ring_get_rptr,
6828        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6829        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6830        .emit_frame_size =
6831                20 + /* gfx_v8_0_ring_emit_gds_switch */
6832                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6833                5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6834                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6835                17 + /* gfx_v8_0_ring_emit_vm_flush */
6836                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6837        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6838        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6839        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6840        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6841        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6842        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6843        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6844        .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6845        .test_ring = gfx_v8_0_ring_test_ring,
6846        .test_ib = gfx_v8_0_ring_test_ib,
6847        .insert_nop = amdgpu_ring_insert_nop,
6848        .pad_ib = amdgpu_ring_generic_pad_ib,
6849};
6850
6851static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6852        .type = AMDGPU_RING_TYPE_KIQ,
6853        .align_mask = 0xff,
6854        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6855        .support_64bit_ptrs = false,
6856        .get_rptr = gfx_v8_0_ring_get_rptr,
6857        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6858        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6859        .emit_frame_size =
6860                20 + /* gfx_v8_0_ring_emit_gds_switch */
6861                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6862                5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6863                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6864                17 + /* gfx_v8_0_ring_emit_vm_flush */
6865                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6866        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6867        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6868        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6869        .test_ring = gfx_v8_0_ring_test_ring,
6870        .test_ib = gfx_v8_0_ring_test_ib,
6871        .insert_nop = amdgpu_ring_insert_nop,
6872        .pad_ib = amdgpu_ring_generic_pad_ib,
6873        .emit_rreg = gfx_v8_0_ring_emit_rreg,
6874        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6875};
6876
6877static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6878{
6879        int i;
6880
6881        adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6882
6883        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6884                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6885
6886        for (i = 0; i < adev->gfx.num_compute_rings; i++)
6887                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6888}
6889
6890static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6891        .set = gfx_v8_0_set_eop_interrupt_state,
6892        .process = gfx_v8_0_eop_irq,
6893};
6894
6895static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6896        .set = gfx_v8_0_set_priv_reg_fault_state,
6897        .process = gfx_v8_0_priv_reg_irq,
6898};
6899
6900static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6901        .set = gfx_v8_0_set_priv_inst_fault_state,
6902        .process = gfx_v8_0_priv_inst_irq,
6903};
6904
6905static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6906        .set = gfx_v8_0_kiq_set_interrupt_state,
6907        .process = gfx_v8_0_kiq_irq,
6908};
6909
6910static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6911{
6912        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6913        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6914
6915        adev->gfx.priv_reg_irq.num_types = 1;
6916        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6917
6918        adev->gfx.priv_inst_irq.num_types = 1;
6919        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6920
6921        adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6922        adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6923}
6924
6925static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6926{
6927        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6928}
6929
6930static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6931{
6932        /* init asci gds info */
6933        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6934        adev->gds.gws.total_size = 64;
6935        adev->gds.oa.total_size = 16;
6936
6937        if (adev->gds.mem.total_size == 64 * 1024) {
6938                adev->gds.mem.gfx_partition_size = 4096;
6939                adev->gds.mem.cs_partition_size = 4096;
6940
6941                adev->gds.gws.gfx_partition_size = 4;
6942                adev->gds.gws.cs_partition_size = 4;
6943
6944                adev->gds.oa.gfx_partition_size = 4;
6945                adev->gds.oa.cs_partition_size = 1;
6946        } else {
6947                adev->gds.mem.gfx_partition_size = 1024;
6948                adev->gds.mem.cs_partition_size = 1024;
6949
6950                adev->gds.gws.gfx_partition_size = 16;
6951                adev->gds.gws.cs_partition_size = 16;
6952
6953                adev->gds.oa.gfx_partition_size = 4;
6954                adev->gds.oa.cs_partition_size = 4;
6955        }
6956}
6957
6958static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6959                                                 u32 bitmap)
6960{
6961        u32 data;
6962
6963        if (!bitmap)
6964                return;
6965
6966        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6967        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6968
6969        WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6970}
6971
6972static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6973{
6974        u32 data, mask;
6975
6976        data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6977                RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6978
6979        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6980
6981        return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6982}
6983
6984static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6985{
6986        int i, j, k, counter, active_cu_number = 0;
6987        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6988        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6989        unsigned disable_masks[4 * 2];
6990        u32 ao_cu_num;
6991
6992        memset(cu_info, 0, sizeof(*cu_info));
6993
6994        if (adev->flags & AMD_IS_APU)
6995                ao_cu_num = 2;
6996        else
6997                ao_cu_num = adev->gfx.config.max_cu_per_sh;
6998
6999        amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7000
7001        mutex_lock(&adev->grbm_idx_mutex);
7002        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7003                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7004                        mask = 1;
7005                        ao_bitmap = 0;
7006                        counter = 0;
7007                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7008                        if (i < 4 && j < 2)
7009                                gfx_v8_0_set_user_cu_inactive_bitmap(
7010                                        adev, disable_masks[i * 2 + j]);
7011                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7012                        cu_info->bitmap[i][j] = bitmap;
7013
7014                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7015                                if (bitmap & mask) {
7016                                        if (counter < ao_cu_num)
7017                                                ao_bitmap |= mask;
7018                                        counter ++;
7019                                }
7020                                mask <<= 1;
7021                        }
7022                        active_cu_number += counter;
7023                        if (i < 2 && j < 2)
7024                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7025                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7026                }
7027        }
7028        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7029        mutex_unlock(&adev->grbm_idx_mutex);
7030
7031        cu_info->number = active_cu_number;
7032        cu_info->ao_cu_mask = ao_cu_mask;
7033}
7034
7035const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7036{
7037        .type = AMD_IP_BLOCK_TYPE_GFX,
7038        .major = 8,
7039        .minor = 0,
7040        .rev = 0,
7041        .funcs = &gfx_v8_0_ip_funcs,
7042};
7043
7044const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7045{
7046        .type = AMD_IP_BLOCK_TYPE_GFX,
7047        .major = 8,
7048        .minor = 1,
7049        .rev = 0,
7050        .funcs = &gfx_v8_0_ip_funcs,
7051};
7052
7053static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7054{
7055        uint64_t ce_payload_addr;
7056        int cnt_ce;
7057        static union {
7058                struct vi_ce_ib_state regular;
7059                struct vi_ce_ib_state_chained_ib chained;
7060        } ce_payload = {};
7061
7062        if (ring->adev->virt.chained_ib_support) {
7063                ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7064                                                  offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7065                cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7066        } else {
7067                ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7068                                                  offsetof(struct vi_gfx_meta_data, ce_payload);
7069                cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7070        }
7071
7072        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7073        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7074                                WRITE_DATA_DST_SEL(8) |
7075                                WR_CONFIRM) |
7076                                WRITE_DATA_CACHE_POLICY(0));
7077        amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7078        amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7079        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7080}
7081
7082static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7083{
7084        uint64_t de_payload_addr, gds_addr, csa_addr;
7085        int cnt_de;
7086        static union {
7087                struct vi_de_ib_state regular;
7088                struct vi_de_ib_state_chained_ib chained;
7089        } de_payload = {};
7090
7091        csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7092        gds_addr = csa_addr + 4096;
7093        if (ring->adev->virt.chained_ib_support) {
7094                de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7095                de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7096                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7097                cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7098        } else {
7099                de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7100                de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7101                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7102                cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7103        }
7104
7105        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7106        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7107                                WRITE_DATA_DST_SEL(8) |
7108                                WR_CONFIRM) |
7109                                WRITE_DATA_CACHE_POLICY(0));
7110        amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7111        amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7112        amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7113}
7114