linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/kernel.h>
  24#include <linux/firmware.h>
  25#include <drm/drmP.h>
  26#include "amdgpu.h"
  27#include "amdgpu_gfx.h"
  28#include "vi.h"
  29#include "vi_structs.h"
  30#include "vid.h"
  31#include "amdgpu_ucode.h"
  32#include "amdgpu_atombios.h"
  33#include "atombios_i2c.h"
  34#include "clearstate_vi.h"
  35
  36#include "gmc/gmc_8_2_d.h"
  37#include "gmc/gmc_8_2_sh_mask.h"
  38
  39#include "oss/oss_3_0_d.h"
  40#include "oss/oss_3_0_sh_mask.h"
  41
  42#include "bif/bif_5_0_d.h"
  43#include "bif/bif_5_0_sh_mask.h"
  44#include "gca/gfx_8_0_d.h"
  45#include "gca/gfx_8_0_enum.h"
  46#include "gca/gfx_8_0_sh_mask.h"
  47#include "gca/gfx_8_0_enum.h"
  48
  49#include "dce/dce_10_0_d.h"
  50#include "dce/dce_10_0_sh_mask.h"
  51
  52#include "smu/smu_7_1_3_d.h"
  53
  54#define GFX8_NUM_GFX_RINGS     1
  55#define GFX8_MEC_HPD_SIZE 2048
  56
  57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  61
  62#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  63#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  64#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  65#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  66#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  67#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  68#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  69#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  70#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  71
  72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  78
  79/* BPM SERDES CMD */
  80#define SET_BPM_SERDES_CMD    1
  81#define CLE_BPM_SERDES_CMD    0
  82
  83/* BPM Register Address*/
  84enum {
  85        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  86        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  87        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  88        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  89        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  90        BPM_REG_FGCG_MAX
  91};
  92
  93#define RLC_FormatDirectRegListLength        14
  94
  95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 101
 102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 114
 115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 127
 128MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 129MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 130MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 131MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 132MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 133MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 134MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 139
 140MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 141MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 143MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 145MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 146MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 151
 152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 155MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 157MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 158MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 163
 164MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 165MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 166MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 167MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 168MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 169MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 170
 171static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 172{
 173        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 174        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 175        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 176        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 177        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 178        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 179        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 180        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 181        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 182        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 183        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 184        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 185        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 186        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 187        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 188        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 189};
 190
 191static const u32 golden_settings_tonga_a11[] =
 192{
 193        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 194        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 195        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 196        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 197        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 198        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 199        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 200        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 201        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 202        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 203        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 204        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 205        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 206        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 207        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 208        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 209};
 210
 211static const u32 tonga_golden_common_all[] =
 212{
 213        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 214        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 215        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 216        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 217        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 218        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 219        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 220        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 221};
 222
 223static const u32 tonga_mgcg_cgcg_init[] =
 224{
 225        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 226        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 227        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 228        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 229        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 230        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 231        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 232        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 233        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 234        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 235        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 236        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 237        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 238        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 239        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 240        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 241        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 242        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 243        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 244        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 245        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 246        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 247        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 248        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 249        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 250        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 251        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 252        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 253        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 254        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 255        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 256        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 257        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 258        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 259        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 260        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 261        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 262        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 263        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 264        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 265        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 266        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 267        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 268        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 269        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 270        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 271        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 272        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 273        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 274        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 275        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 276        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 277        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 278        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 279        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 280        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 281        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 282        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 283        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 284        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 285        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 286        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 287        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 288        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 289        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 290        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 291        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 292        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 293        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 294        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 295        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 296        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 297        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 298        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 299        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 300};
 301
 302static const u32 golden_settings_vegam_a11[] =
 303{
 304        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 305        mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 306        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 307        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 308        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 309        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 310        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 311        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 312        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 313        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 314        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 315        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 316        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 317        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 318        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 319        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 320        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 321};
 322
 323static const u32 vegam_golden_common_all[] =
 324{
 325        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 326        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 327        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 328        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 329        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 330        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 331};
 332
 333static const u32 golden_settings_polaris11_a11[] =
 334{
 335        mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 336        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 337        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 338        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 339        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 340        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 341        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 342        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 343        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 344        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 345        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 346        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 347        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 348        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 349        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 350        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 351        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 352};
 353
 354static const u32 polaris11_golden_common_all[] =
 355{
 356        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 357        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 358        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 359        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 360        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 361        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 362};
 363
 364static const u32 golden_settings_polaris10_a11[] =
 365{
 366        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 367        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 368        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 369        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 370        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 371        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 372        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 373        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 374        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 375        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 376        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 377        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 378        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 379        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 380        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 381        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 382        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 383};
 384
 385static const u32 polaris10_golden_common_all[] =
 386{
 387        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 388        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 389        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 390        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 391        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 392        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 393        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 394        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 395};
 396
 397static const u32 fiji_golden_common_all[] =
 398{
 399        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 400        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 401        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 402        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 403        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 404        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 405        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 406        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 407        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 408        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 409};
 410
 411static const u32 golden_settings_fiji_a10[] =
 412{
 413        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 414        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 415        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 416        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 417        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 418        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 419        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 420        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 421        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 422        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 423        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 424};
 425
 426static const u32 fiji_mgcg_cgcg_init[] =
 427{
 428        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 429        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 430        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 431        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 432        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 433        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 434        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 435        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 436        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 437        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 438        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 439        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 440        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 441        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 442        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 443        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 444        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 445        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 446        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 447        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 448        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 449        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 450        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 451        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 452        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 453        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 454        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 455        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 456        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 457        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 458        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 459        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 460        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 461        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 462        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 463};
 464
 465static const u32 golden_settings_iceland_a11[] =
 466{
 467        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 468        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 469        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 470        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 471        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 472        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 473        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 474        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 475        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 476        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 477        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 478        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 479        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 480        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 481        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 482        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 483};
 484
 485static const u32 iceland_golden_common_all[] =
 486{
 487        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 488        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 489        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 490        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 491        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 492        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 493        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 494        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 495};
 496
 497static const u32 iceland_mgcg_cgcg_init[] =
 498{
 499        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 500        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 501        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 502        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 503        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 504        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 505        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 506        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 507        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 508        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 509        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 510        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 511        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 512        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 513        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 514        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 515        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 516        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 517        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 518        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 519        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 520        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 521        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 522        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 523        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 524        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 525        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 526        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 527        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 528        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 529        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 530        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 531        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 532        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 533        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 534        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 535        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 536        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 537        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 538        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 539        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 540        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 541        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 542        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 543        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 544        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 545        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 546        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 547        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 548        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 549        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 550        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 551        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 552        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 553        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 554        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 555        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 556        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 557        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 558        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 559        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 560        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 561        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 562        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 563};
 564
 565static const u32 cz_golden_settings_a11[] =
 566{
 567        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 568        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 569        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 570        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 571        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 572        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 573        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 574        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 575        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 576        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 577        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 578        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 579};
 580
 581static const u32 cz_golden_common_all[] =
 582{
 583        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 584        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 585        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 586        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 587        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 588        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 589        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 590        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 591};
 592
 593static const u32 cz_mgcg_cgcg_init[] =
 594{
 595        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 596        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 597        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 598        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 599        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 600        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 601        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 602        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 603        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 604        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 605        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 606        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 607        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 608        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 609        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 610        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 611        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 612        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 613        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 614        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 615        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 616        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 617        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 618        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 619        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 620        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 621        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 622        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 623        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 624        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 625        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 626        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 627        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 628        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 629        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 630        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 631        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 632        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 633        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 634        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 635        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 636        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 637        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 638        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 639        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 640        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 641        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 642        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 643        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 644        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 645        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 646        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 647        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 648        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 649        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 650        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 651        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 652        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 653        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 654        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 655        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 656        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 657        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 658        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 659        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 660        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 661        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 662        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 663        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 664        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 665        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 666        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 667        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 668        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 669        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 670};
 671
 672static const u32 stoney_golden_settings_a11[] =
 673{
 674        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 675        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 676        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 677        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 678        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 679        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 680        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 681        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 682        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 683        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 684};
 685
 686static const u32 stoney_golden_common_all[] =
 687{
 688        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 689        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 690        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 691        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 692        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 693        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 694        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 695        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 696};
 697
 698static const u32 stoney_mgcg_cgcg_init[] =
 699{
 700        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 701        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 702        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 703        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 704        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 705};
 706
 707static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 708static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 709static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 710static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 711static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 712static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 713static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 714static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 715
 716static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 717{
 718        switch (adev->asic_type) {
 719        case CHIP_TOPAZ:
 720                amdgpu_device_program_register_sequence(adev,
 721                                                        iceland_mgcg_cgcg_init,
 722                                                        ARRAY_SIZE(iceland_mgcg_cgcg_init));
 723                amdgpu_device_program_register_sequence(adev,
 724                                                        golden_settings_iceland_a11,
 725                                                        ARRAY_SIZE(golden_settings_iceland_a11));
 726                amdgpu_device_program_register_sequence(adev,
 727                                                        iceland_golden_common_all,
 728                                                        ARRAY_SIZE(iceland_golden_common_all));
 729                break;
 730        case CHIP_FIJI:
 731                amdgpu_device_program_register_sequence(adev,
 732                                                        fiji_mgcg_cgcg_init,
 733                                                        ARRAY_SIZE(fiji_mgcg_cgcg_init));
 734                amdgpu_device_program_register_sequence(adev,
 735                                                        golden_settings_fiji_a10,
 736                                                        ARRAY_SIZE(golden_settings_fiji_a10));
 737                amdgpu_device_program_register_sequence(adev,
 738                                                        fiji_golden_common_all,
 739                                                        ARRAY_SIZE(fiji_golden_common_all));
 740                break;
 741
 742        case CHIP_TONGA:
 743                amdgpu_device_program_register_sequence(adev,
 744                                                        tonga_mgcg_cgcg_init,
 745                                                        ARRAY_SIZE(tonga_mgcg_cgcg_init));
 746                amdgpu_device_program_register_sequence(adev,
 747                                                        golden_settings_tonga_a11,
 748                                                        ARRAY_SIZE(golden_settings_tonga_a11));
 749                amdgpu_device_program_register_sequence(adev,
 750                                                        tonga_golden_common_all,
 751                                                        ARRAY_SIZE(tonga_golden_common_all));
 752                break;
 753        case CHIP_VEGAM:
 754                amdgpu_device_program_register_sequence(adev,
 755                                                        golden_settings_vegam_a11,
 756                                                        ARRAY_SIZE(golden_settings_vegam_a11));
 757                amdgpu_device_program_register_sequence(adev,
 758                                                        vegam_golden_common_all,
 759                                                        ARRAY_SIZE(vegam_golden_common_all));
 760                break;
 761        case CHIP_POLARIS11:
 762        case CHIP_POLARIS12:
 763                amdgpu_device_program_register_sequence(adev,
 764                                                        golden_settings_polaris11_a11,
 765                                                        ARRAY_SIZE(golden_settings_polaris11_a11));
 766                amdgpu_device_program_register_sequence(adev,
 767                                                        polaris11_golden_common_all,
 768                                                        ARRAY_SIZE(polaris11_golden_common_all));
 769                break;
 770        case CHIP_POLARIS10:
 771                amdgpu_device_program_register_sequence(adev,
 772                                                        golden_settings_polaris10_a11,
 773                                                        ARRAY_SIZE(golden_settings_polaris10_a11));
 774                amdgpu_device_program_register_sequence(adev,
 775                                                        polaris10_golden_common_all,
 776                                                        ARRAY_SIZE(polaris10_golden_common_all));
 777                WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 778                if (adev->pdev->revision == 0xc7 &&
 779                    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 780                     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 781                     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 782                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 783                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 784                }
 785                break;
 786        case CHIP_CARRIZO:
 787                amdgpu_device_program_register_sequence(adev,
 788                                                        cz_mgcg_cgcg_init,
 789                                                        ARRAY_SIZE(cz_mgcg_cgcg_init));
 790                amdgpu_device_program_register_sequence(adev,
 791                                                        cz_golden_settings_a11,
 792                                                        ARRAY_SIZE(cz_golden_settings_a11));
 793                amdgpu_device_program_register_sequence(adev,
 794                                                        cz_golden_common_all,
 795                                                        ARRAY_SIZE(cz_golden_common_all));
 796                break;
 797        case CHIP_STONEY:
 798                amdgpu_device_program_register_sequence(adev,
 799                                                        stoney_mgcg_cgcg_init,
 800                                                        ARRAY_SIZE(stoney_mgcg_cgcg_init));
 801                amdgpu_device_program_register_sequence(adev,
 802                                                        stoney_golden_settings_a11,
 803                                                        ARRAY_SIZE(stoney_golden_settings_a11));
 804                amdgpu_device_program_register_sequence(adev,
 805                                                        stoney_golden_common_all,
 806                                                        ARRAY_SIZE(stoney_golden_common_all));
 807                break;
 808        default:
 809                break;
 810        }
 811}
 812
 813static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 814{
 815        adev->gfx.scratch.num_reg = 8;
 816        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 817        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 818}
 819
 820static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 821{
 822        struct amdgpu_device *adev = ring->adev;
 823        uint32_t scratch;
 824        uint32_t tmp = 0;
 825        unsigned i;
 826        int r;
 827
 828        r = amdgpu_gfx_scratch_get(adev, &scratch);
 829        if (r) {
 830                DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 831                return r;
 832        }
 833        WREG32(scratch, 0xCAFEDEAD);
 834        r = amdgpu_ring_alloc(ring, 3);
 835        if (r) {
 836                DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 837                          ring->idx, r);
 838                amdgpu_gfx_scratch_free(adev, scratch);
 839                return r;
 840        }
 841        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 842        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 843        amdgpu_ring_write(ring, 0xDEADBEEF);
 844        amdgpu_ring_commit(ring);
 845
 846        for (i = 0; i < adev->usec_timeout; i++) {
 847                tmp = RREG32(scratch);
 848                if (tmp == 0xDEADBEEF)
 849                        break;
 850                DRM_UDELAY(1);
 851        }
 852        if (i < adev->usec_timeout) {
 853                DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
 854                         ring->idx, i);
 855        } else {
 856                DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 857                          ring->idx, scratch, tmp);
 858                r = -EINVAL;
 859        }
 860        amdgpu_gfx_scratch_free(adev, scratch);
 861        return r;
 862}
 863
 864static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 865{
 866        struct amdgpu_device *adev = ring->adev;
 867        struct amdgpu_ib ib;
 868        struct dma_fence *f = NULL;
 869        uint32_t scratch;
 870        uint32_t tmp = 0;
 871        long r;
 872
 873        r = amdgpu_gfx_scratch_get(adev, &scratch);
 874        if (r) {
 875                DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 876                return r;
 877        }
 878        WREG32(scratch, 0xCAFEDEAD);
 879        memset(&ib, 0, sizeof(ib));
 880        r = amdgpu_ib_get(adev, NULL, 256, &ib);
 881        if (r) {
 882                DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 883                goto err1;
 884        }
 885        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 886        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 887        ib.ptr[2] = 0xDEADBEEF;
 888        ib.length_dw = 3;
 889
 890        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 891        if (r)
 892                goto err2;
 893
 894        r = dma_fence_wait_timeout(f, false, timeout);
 895        if (r == 0) {
 896                DRM_ERROR("amdgpu: IB test timed out.\n");
 897                r = -ETIMEDOUT;
 898                goto err2;
 899        } else if (r < 0) {
 900                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 901                goto err2;
 902        }
 903        tmp = RREG32(scratch);
 904        if (tmp == 0xDEADBEEF) {
 905                DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
 906                r = 0;
 907        } else {
 908                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 909                          scratch, tmp);
 910                r = -EINVAL;
 911        }
 912err2:
 913        amdgpu_ib_free(adev, &ib, NULL);
 914        dma_fence_put(f);
 915err1:
 916        amdgpu_gfx_scratch_free(adev, scratch);
 917        return r;
 918}
 919
 920
 921static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 922{
 923        release_firmware(adev->gfx.pfp_fw);
 924        adev->gfx.pfp_fw = NULL;
 925        release_firmware(adev->gfx.me_fw);
 926        adev->gfx.me_fw = NULL;
 927        release_firmware(adev->gfx.ce_fw);
 928        adev->gfx.ce_fw = NULL;
 929        release_firmware(adev->gfx.rlc_fw);
 930        adev->gfx.rlc_fw = NULL;
 931        release_firmware(adev->gfx.mec_fw);
 932        adev->gfx.mec_fw = NULL;
 933        if ((adev->asic_type != CHIP_STONEY) &&
 934            (adev->asic_type != CHIP_TOPAZ))
 935                release_firmware(adev->gfx.mec2_fw);
 936        adev->gfx.mec2_fw = NULL;
 937
 938        kfree(adev->gfx.rlc.register_list_format);
 939}
 940
 941static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 942{
 943        const char *chip_name;
 944        char fw_name[30];
 945        int err;
 946        struct amdgpu_firmware_info *info = NULL;
 947        const struct common_firmware_header *header = NULL;
 948        const struct gfx_firmware_header_v1_0 *cp_hdr;
 949        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 950        unsigned int *tmp = NULL, i;
 951
 952        DRM_DEBUG("\n");
 953
 954        switch (adev->asic_type) {
 955        case CHIP_TOPAZ:
 956                chip_name = "topaz";
 957                break;
 958        case CHIP_TONGA:
 959                chip_name = "tonga";
 960                break;
 961        case CHIP_CARRIZO:
 962                chip_name = "carrizo";
 963                break;
 964        case CHIP_FIJI:
 965                chip_name = "fiji";
 966                break;
 967        case CHIP_STONEY:
 968                chip_name = "stoney";
 969                break;
 970        case CHIP_POLARIS10:
 971                chip_name = "polaris10";
 972                break;
 973        case CHIP_POLARIS11:
 974                chip_name = "polaris11";
 975                break;
 976        case CHIP_POLARIS12:
 977                chip_name = "polaris12";
 978                break;
 979        case CHIP_VEGAM:
 980                chip_name = "vegam";
 981                break;
 982        default:
 983                BUG();
 984        }
 985
 986        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 987                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 988                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 989                if (err == -ENOENT) {
 990                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 991                        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 992                }
 993        } else {
 994                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 995                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 996        }
 997        if (err)
 998                goto out;
 999        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1000        if (err)
1001                goto out;
1002        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1003        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005
1006        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1008                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1009                if (err == -ENOENT) {
1010                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1011                        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1012                }
1013        } else {
1014                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016        }
1017        if (err)
1018                goto out;
1019        err = amdgpu_ucode_validate(adev->gfx.me_fw);
1020        if (err)
1021                goto out;
1022        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1023        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024
1025        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026
1027        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1028                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1029                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1030                if (err == -ENOENT) {
1031                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1032                        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1033                }
1034        } else {
1035                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037        }
1038        if (err)
1039                goto out;
1040        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1041        if (err)
1042                goto out;
1043        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1044        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047        /*
1048         * Support for MCBP/Virtualization in combination with chained IBs is
1049         * formal released on feature version #46
1050         */
1051        if (adev->gfx.ce_feature_version >= 46 &&
1052            adev->gfx.pfp_feature_version >= 46) {
1053                adev->virt.chained_ib_support = true;
1054                DRM_INFO("Chained IB support enabled!\n");
1055        } else
1056                adev->virt.chained_ib_support = false;
1057
1058        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1059        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1060        if (err)
1061                goto out;
1062        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1063        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1064        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1065        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1066
1067        adev->gfx.rlc.save_and_restore_offset =
1068                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
1069        adev->gfx.rlc.clear_state_descriptor_offset =
1070                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1071        adev->gfx.rlc.avail_scratch_ram_locations =
1072                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1073        adev->gfx.rlc.reg_restore_list_size =
1074                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
1075        adev->gfx.rlc.reg_list_format_start =
1076                        le32_to_cpu(rlc_hdr->reg_list_format_start);
1077        adev->gfx.rlc.reg_list_format_separate_start =
1078                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1079        adev->gfx.rlc.starting_offsets_start =
1080                        le32_to_cpu(rlc_hdr->starting_offsets_start);
1081        adev->gfx.rlc.reg_list_format_size_bytes =
1082                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1083        adev->gfx.rlc.reg_list_size_bytes =
1084                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1085
1086        adev->gfx.rlc.register_list_format =
1087                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1088                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1089
1090        if (!adev->gfx.rlc.register_list_format) {
1091                err = -ENOMEM;
1092                goto out;
1093        }
1094
1095        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1096                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1097        for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1098                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1099
1100        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1101
1102        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1104        for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1105                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1106
1107        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1108                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1109                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1110                if (err == -ENOENT) {
1111                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1112                        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1113                }
1114        } else {
1115                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117        }
1118        if (err)
1119                goto out;
1120        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1121        if (err)
1122                goto out;
1123        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1124        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1125        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1126
1127        if ((adev->asic_type != CHIP_STONEY) &&
1128            (adev->asic_type != CHIP_TOPAZ)) {
1129                if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1130                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1131                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1132                        if (err == -ENOENT) {
1133                                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1134                                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1135                        }
1136                } else {
1137                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                }
1140                if (!err) {
1141                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1142                        if (err)
1143                                goto out;
1144                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1145                                adev->gfx.mec2_fw->data;
1146                        adev->gfx.mec2_fw_version =
1147                                le32_to_cpu(cp_hdr->header.ucode_version);
1148                        adev->gfx.mec2_feature_version =
1149                                le32_to_cpu(cp_hdr->ucode_feature_version);
1150                } else {
1151                        err = 0;
1152                        adev->gfx.mec2_fw = NULL;
1153                }
1154        }
1155
1156        if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1157                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1158                info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1159                info->fw = adev->gfx.pfp_fw;
1160                header = (const struct common_firmware_header *)info->fw->data;
1161                adev->firmware.fw_size +=
1162                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163
1164                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1165                info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1166                info->fw = adev->gfx.me_fw;
1167                header = (const struct common_firmware_header *)info->fw->data;
1168                adev->firmware.fw_size +=
1169                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1172                info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1173                info->fw = adev->gfx.ce_fw;
1174                header = (const struct common_firmware_header *)info->fw->data;
1175                adev->firmware.fw_size +=
1176                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1179                info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1180                info->fw = adev->gfx.rlc_fw;
1181                header = (const struct common_firmware_header *)info->fw->data;
1182                adev->firmware.fw_size +=
1183                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1186                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1187                info->fw = adev->gfx.mec_fw;
1188                header = (const struct common_firmware_header *)info->fw->data;
1189                adev->firmware.fw_size +=
1190                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192                /* we need account JT in */
1193                cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194                adev->firmware.fw_size +=
1195                        ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1196
1197                if (amdgpu_sriov_vf(adev)) {
1198                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1199                        info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1200                        info->fw = adev->gfx.mec_fw;
1201                        adev->firmware.fw_size +=
1202                                ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1203                }
1204
1205                if (adev->gfx.mec2_fw) {
1206                        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1207                        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1208                        info->fw = adev->gfx.mec2_fw;
1209                        header = (const struct common_firmware_header *)info->fw->data;
1210                        adev->firmware.fw_size +=
1211                                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212                }
1213
1214        }
1215
1216out:
1217        if (err) {
1218                dev_err(adev->dev,
1219                        "gfx8: Failed to load firmware \"%s\"\n",
1220                        fw_name);
1221                release_firmware(adev->gfx.pfp_fw);
1222                adev->gfx.pfp_fw = NULL;
1223                release_firmware(adev->gfx.me_fw);
1224                adev->gfx.me_fw = NULL;
1225                release_firmware(adev->gfx.ce_fw);
1226                adev->gfx.ce_fw = NULL;
1227                release_firmware(adev->gfx.rlc_fw);
1228                adev->gfx.rlc_fw = NULL;
1229                release_firmware(adev->gfx.mec_fw);
1230                adev->gfx.mec_fw = NULL;
1231                release_firmware(adev->gfx.mec2_fw);
1232                adev->gfx.mec2_fw = NULL;
1233        }
1234        return err;
1235}
1236
1237static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1238                                    volatile u32 *buffer)
1239{
1240        u32 count = 0, i;
1241        const struct cs_section_def *sect = NULL;
1242        const struct cs_extent_def *ext = NULL;
1243
1244        if (adev->gfx.rlc.cs_data == NULL)
1245                return;
1246        if (buffer == NULL)
1247                return;
1248
1249        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1250        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1251
1252        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1253        buffer[count++] = cpu_to_le32(0x80000000);
1254        buffer[count++] = cpu_to_le32(0x80000000);
1255
1256        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1257                for (ext = sect->section; ext->extent != NULL; ++ext) {
1258                        if (sect->id == SECT_CONTEXT) {
1259                                buffer[count++] =
1260                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1261                                buffer[count++] = cpu_to_le32(ext->reg_index -
1262                                                PACKET3_SET_CONTEXT_REG_START);
1263                                for (i = 0; i < ext->reg_count; i++)
1264                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1265                        } else {
1266                                return;
1267                        }
1268                }
1269        }
1270
1271        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1272        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1273                        PACKET3_SET_CONTEXT_REG_START);
1274        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1275        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1276
1277        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1278        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1279
1280        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1281        buffer[count++] = cpu_to_le32(0);
1282}
1283
1284static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1285{
1286        const __le32 *fw_data;
1287        volatile u32 *dst_ptr;
1288        int me, i, max_me = 4;
1289        u32 bo_offset = 0;
1290        u32 table_offset, table_size;
1291
1292        if (adev->asic_type == CHIP_CARRIZO)
1293                max_me = 5;
1294
1295        /* write the cp table buffer */
1296        dst_ptr = adev->gfx.rlc.cp_table_ptr;
1297        for (me = 0; me < max_me; me++) {
1298                if (me == 0) {
1299                        const struct gfx_firmware_header_v1_0 *hdr =
1300                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1301                        fw_data = (const __le32 *)
1302                                (adev->gfx.ce_fw->data +
1303                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1304                        table_offset = le32_to_cpu(hdr->jt_offset);
1305                        table_size = le32_to_cpu(hdr->jt_size);
1306                } else if (me == 1) {
1307                        const struct gfx_firmware_header_v1_0 *hdr =
1308                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1309                        fw_data = (const __le32 *)
1310                                (adev->gfx.pfp_fw->data +
1311                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1312                        table_offset = le32_to_cpu(hdr->jt_offset);
1313                        table_size = le32_to_cpu(hdr->jt_size);
1314                } else if (me == 2) {
1315                        const struct gfx_firmware_header_v1_0 *hdr =
1316                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1317                        fw_data = (const __le32 *)
1318                                (adev->gfx.me_fw->data +
1319                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1320                        table_offset = le32_to_cpu(hdr->jt_offset);
1321                        table_size = le32_to_cpu(hdr->jt_size);
1322                } else if (me == 3) {
1323                        const struct gfx_firmware_header_v1_0 *hdr =
1324                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1325                        fw_data = (const __le32 *)
1326                                (adev->gfx.mec_fw->data +
1327                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1328                        table_offset = le32_to_cpu(hdr->jt_offset);
1329                        table_size = le32_to_cpu(hdr->jt_size);
1330                } else  if (me == 4) {
1331                        const struct gfx_firmware_header_v1_0 *hdr =
1332                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1333                        fw_data = (const __le32 *)
1334                                (adev->gfx.mec2_fw->data +
1335                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1336                        table_offset = le32_to_cpu(hdr->jt_offset);
1337                        table_size = le32_to_cpu(hdr->jt_size);
1338                }
1339
1340                for (i = 0; i < table_size; i ++) {
1341                        dst_ptr[bo_offset + i] =
1342                                cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1343                }
1344
1345                bo_offset += table_size;
1346        }
1347}
1348
1349static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1350{
1351        amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1352        amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1353}
1354
1355static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1356{
1357        volatile u32 *dst_ptr;
1358        u32 dws;
1359        const struct cs_section_def *cs_data;
1360        int r;
1361
1362        adev->gfx.rlc.cs_data = vi_cs_data;
1363
1364        cs_data = adev->gfx.rlc.cs_data;
1365
1366        if (cs_data) {
1367                /* clear state block */
1368                adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1369
1370                r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1371                                              AMDGPU_GEM_DOMAIN_VRAM,
1372                                              &adev->gfx.rlc.clear_state_obj,
1373                                              &adev->gfx.rlc.clear_state_gpu_addr,
1374                                              (void **)&adev->gfx.rlc.cs_ptr);
1375                if (r) {
1376                        dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1377                        gfx_v8_0_rlc_fini(adev);
1378                        return r;
1379                }
1380
1381                /* set up the cs buffer */
1382                dst_ptr = adev->gfx.rlc.cs_ptr;
1383                gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1384                amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1385                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1386        }
1387
1388        if ((adev->asic_type == CHIP_CARRIZO) ||
1389            (adev->asic_type == CHIP_STONEY)) {
1390                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1391                r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1392                                              PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1393                                              &adev->gfx.rlc.cp_table_obj,
1394                                              &adev->gfx.rlc.cp_table_gpu_addr,
1395                                              (void **)&adev->gfx.rlc.cp_table_ptr);
1396                if (r) {
1397                        dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1398                        return r;
1399                }
1400
1401                cz_init_cp_jump_table(adev);
1402
1403                amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1404                amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1405        }
1406
1407        return 0;
1408}
1409
1410static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1411{
1412        amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1413}
1414
1415static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1416{
1417        int r;
1418        u32 *hpd;
1419        size_t mec_hpd_size;
1420
1421        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1422
1423        /* take ownership of the relevant compute queues */
1424        amdgpu_gfx_compute_queue_acquire(adev);
1425
1426        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1427
1428        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1429                                      AMDGPU_GEM_DOMAIN_GTT,
1430                                      &adev->gfx.mec.hpd_eop_obj,
1431                                      &adev->gfx.mec.hpd_eop_gpu_addr,
1432                                      (void **)&hpd);
1433        if (r) {
1434                dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1435                return r;
1436        }
1437
1438        memset(hpd, 0, mec_hpd_size);
1439
1440        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1441        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1442
1443        return 0;
1444}
1445
1446static const u32 vgpr_init_compute_shader[] =
1447{
1448        0x7e000209, 0x7e020208,
1449        0x7e040207, 0x7e060206,
1450        0x7e080205, 0x7e0a0204,
1451        0x7e0c0203, 0x7e0e0202,
1452        0x7e100201, 0x7e120200,
1453        0x7e140209, 0x7e160208,
1454        0x7e180207, 0x7e1a0206,
1455        0x7e1c0205, 0x7e1e0204,
1456        0x7e200203, 0x7e220202,
1457        0x7e240201, 0x7e260200,
1458        0x7e280209, 0x7e2a0208,
1459        0x7e2c0207, 0x7e2e0206,
1460        0x7e300205, 0x7e320204,
1461        0x7e340203, 0x7e360202,
1462        0x7e380201, 0x7e3a0200,
1463        0x7e3c0209, 0x7e3e0208,
1464        0x7e400207, 0x7e420206,
1465        0x7e440205, 0x7e460204,
1466        0x7e480203, 0x7e4a0202,
1467        0x7e4c0201, 0x7e4e0200,
1468        0x7e500209, 0x7e520208,
1469        0x7e540207, 0x7e560206,
1470        0x7e580205, 0x7e5a0204,
1471        0x7e5c0203, 0x7e5e0202,
1472        0x7e600201, 0x7e620200,
1473        0x7e640209, 0x7e660208,
1474        0x7e680207, 0x7e6a0206,
1475        0x7e6c0205, 0x7e6e0204,
1476        0x7e700203, 0x7e720202,
1477        0x7e740201, 0x7e760200,
1478        0x7e780209, 0x7e7a0208,
1479        0x7e7c0207, 0x7e7e0206,
1480        0xbf8a0000, 0xbf810000,
1481};
1482
1483static const u32 sgpr_init_compute_shader[] =
1484{
1485        0xbe8a0100, 0xbe8c0102,
1486        0xbe8e0104, 0xbe900106,
1487        0xbe920108, 0xbe940100,
1488        0xbe960102, 0xbe980104,
1489        0xbe9a0106, 0xbe9c0108,
1490        0xbe9e0100, 0xbea00102,
1491        0xbea20104, 0xbea40106,
1492        0xbea60108, 0xbea80100,
1493        0xbeaa0102, 0xbeac0104,
1494        0xbeae0106, 0xbeb00108,
1495        0xbeb20100, 0xbeb40102,
1496        0xbeb60104, 0xbeb80106,
1497        0xbeba0108, 0xbebc0100,
1498        0xbebe0102, 0xbec00104,
1499        0xbec20106, 0xbec40108,
1500        0xbec60100, 0xbec80102,
1501        0xbee60004, 0xbee70005,
1502        0xbeea0006, 0xbeeb0007,
1503        0xbee80008, 0xbee90009,
1504        0xbefc0000, 0xbf8a0000,
1505        0xbf810000, 0x00000000,
1506};
1507
1508static const u32 vgpr_init_regs[] =
1509{
1510        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1511        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1512        mmCOMPUTE_NUM_THREAD_X, 256*4,
1513        mmCOMPUTE_NUM_THREAD_Y, 1,
1514        mmCOMPUTE_NUM_THREAD_Z, 1,
1515        mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1516        mmCOMPUTE_PGM_RSRC2, 20,
1517        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1518        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1519        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1520        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1521        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1522        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1523        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1524        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1525        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1526        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1527};
1528
1529static const u32 sgpr1_init_regs[] =
1530{
1531        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1532        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1533        mmCOMPUTE_NUM_THREAD_X, 256*5,
1534        mmCOMPUTE_NUM_THREAD_Y, 1,
1535        mmCOMPUTE_NUM_THREAD_Z, 1,
1536        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1537        mmCOMPUTE_PGM_RSRC2, 20,
1538        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1539        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1540        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1541        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1542        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1543        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1544        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1545        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1546        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1547        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1548};
1549
1550static const u32 sgpr2_init_regs[] =
1551{
1552        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1553        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1554        mmCOMPUTE_NUM_THREAD_X, 256*5,
1555        mmCOMPUTE_NUM_THREAD_Y, 1,
1556        mmCOMPUTE_NUM_THREAD_Z, 1,
1557        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1558        mmCOMPUTE_PGM_RSRC2, 20,
1559        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569};
1570
1571static const u32 sec_ded_counter_registers[] =
1572{
1573        mmCPC_EDC_ATC_CNT,
1574        mmCPC_EDC_SCRATCH_CNT,
1575        mmCPC_EDC_UCODE_CNT,
1576        mmCPF_EDC_ATC_CNT,
1577        mmCPF_EDC_ROQ_CNT,
1578        mmCPF_EDC_TAG_CNT,
1579        mmCPG_EDC_ATC_CNT,
1580        mmCPG_EDC_DMA_CNT,
1581        mmCPG_EDC_TAG_CNT,
1582        mmDC_EDC_CSINVOC_CNT,
1583        mmDC_EDC_RESTORE_CNT,
1584        mmDC_EDC_STATE_CNT,
1585        mmGDS_EDC_CNT,
1586        mmGDS_EDC_GRBM_CNT,
1587        mmGDS_EDC_OA_DED,
1588        mmSPI_EDC_CNT,
1589        mmSQC_ATC_EDC_GATCL1_CNT,
1590        mmSQC_EDC_CNT,
1591        mmSQ_EDC_DED_CNT,
1592        mmSQ_EDC_INFO,
1593        mmSQ_EDC_SEC_CNT,
1594        mmTCC_EDC_CNT,
1595        mmTCP_ATC_EDC_GATCL1_CNT,
1596        mmTCP_EDC_CNT,
1597        mmTD_EDC_CNT
1598};
1599
1600static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1601{
1602        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1603        struct amdgpu_ib ib;
1604        struct dma_fence *f = NULL;
1605        int r, i;
1606        u32 tmp;
1607        unsigned total_size, vgpr_offset, sgpr_offset;
1608        u64 gpu_addr;
1609
1610        /* only supported on CZ */
1611        if (adev->asic_type != CHIP_CARRIZO)
1612                return 0;
1613
1614        /* bail if the compute ring is not ready */
1615        if (!ring->ready)
1616                return 0;
1617
1618        tmp = RREG32(mmGB_EDC_MODE);
1619        WREG32(mmGB_EDC_MODE, 0);
1620
1621        total_size =
1622                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1623        total_size +=
1624                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625        total_size +=
1626                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627        total_size = ALIGN(total_size, 256);
1628        vgpr_offset = total_size;
1629        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1630        sgpr_offset = total_size;
1631        total_size += sizeof(sgpr_init_compute_shader);
1632
1633        /* allocate an indirect buffer to put the commands in */
1634        memset(&ib, 0, sizeof(ib));
1635        r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1636        if (r) {
1637                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1638                return r;
1639        }
1640
1641        /* load the compute shaders */
1642        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1643                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1644
1645        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1646                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1647
1648        /* init the ib length to 0 */
1649        ib.length_dw = 0;
1650
1651        /* VGPR */
1652        /* write the register state for the compute dispatch */
1653        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1654                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1657        }
1658        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1660        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665        /* write dispatch packet */
1666        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667        ib.ptr[ib.length_dw++] = 8; /* x */
1668        ib.ptr[ib.length_dw++] = 1; /* y */
1669        ib.ptr[ib.length_dw++] = 1; /* z */
1670        ib.ptr[ib.length_dw++] =
1671                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673        /* write CS partial flush packet */
1674        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677        /* SGPR1 */
1678        /* write the register state for the compute dispatch */
1679        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1680                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1682                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1683        }
1684        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691        /* write dispatch packet */
1692        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693        ib.ptr[ib.length_dw++] = 8; /* x */
1694        ib.ptr[ib.length_dw++] = 1; /* y */
1695        ib.ptr[ib.length_dw++] = 1; /* z */
1696        ib.ptr[ib.length_dw++] =
1697                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699        /* write CS partial flush packet */
1700        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703        /* SGPR2 */
1704        /* write the register state for the compute dispatch */
1705        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1706                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1707                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1708                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1709        }
1710        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1711        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1712        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1713        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1714        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1715        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1716
1717        /* write dispatch packet */
1718        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1719        ib.ptr[ib.length_dw++] = 8; /* x */
1720        ib.ptr[ib.length_dw++] = 1; /* y */
1721        ib.ptr[ib.length_dw++] = 1; /* z */
1722        ib.ptr[ib.length_dw++] =
1723                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1724
1725        /* write CS partial flush packet */
1726        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1727        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1728
1729        /* shedule the ib on the ring */
1730        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1731        if (r) {
1732                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1733                goto fail;
1734        }
1735
1736        /* wait for the GPU to finish processing the IB */
1737        r = dma_fence_wait(f, false);
1738        if (r) {
1739                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1740                goto fail;
1741        }
1742
1743        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1744        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1745        WREG32(mmGB_EDC_MODE, tmp);
1746
1747        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1748        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1749        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1750
1751
1752        /* read back registers to clear the counters */
1753        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1754                RREG32(sec_ded_counter_registers[i]);
1755
1756fail:
1757        amdgpu_ib_free(adev, &ib, NULL);
1758        dma_fence_put(f);
1759
1760        return r;
1761}
1762
1763static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1764{
1765        u32 gb_addr_config;
1766        u32 mc_shared_chmap, mc_arb_ramcfg;
1767        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1768        u32 tmp;
1769        int ret;
1770
1771        switch (adev->asic_type) {
1772        case CHIP_TOPAZ:
1773                adev->gfx.config.max_shader_engines = 1;
1774                adev->gfx.config.max_tile_pipes = 2;
1775                adev->gfx.config.max_cu_per_sh = 6;
1776                adev->gfx.config.max_sh_per_se = 1;
1777                adev->gfx.config.max_backends_per_se = 2;
1778                adev->gfx.config.max_texture_channel_caches = 2;
1779                adev->gfx.config.max_gprs = 256;
1780                adev->gfx.config.max_gs_threads = 32;
1781                adev->gfx.config.max_hw_contexts = 8;
1782
1783                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1788                break;
1789        case CHIP_FIJI:
1790                adev->gfx.config.max_shader_engines = 4;
1791                adev->gfx.config.max_tile_pipes = 16;
1792                adev->gfx.config.max_cu_per_sh = 16;
1793                adev->gfx.config.max_sh_per_se = 1;
1794                adev->gfx.config.max_backends_per_se = 4;
1795                adev->gfx.config.max_texture_channel_caches = 16;
1796                adev->gfx.config.max_gprs = 256;
1797                adev->gfx.config.max_gs_threads = 32;
1798                adev->gfx.config.max_hw_contexts = 8;
1799
1800                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805                break;
1806        case CHIP_POLARIS11:
1807        case CHIP_POLARIS12:
1808                ret = amdgpu_atombios_get_gfx_info(adev);
1809                if (ret)
1810                        return ret;
1811                adev->gfx.config.max_gprs = 256;
1812                adev->gfx.config.max_gs_threads = 32;
1813                adev->gfx.config.max_hw_contexts = 8;
1814
1815                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1820                break;
1821        case CHIP_POLARIS10:
1822        case CHIP_VEGAM:
1823                ret = amdgpu_atombios_get_gfx_info(adev);
1824                if (ret)
1825                        return ret;
1826                adev->gfx.config.max_gprs = 256;
1827                adev->gfx.config.max_gs_threads = 32;
1828                adev->gfx.config.max_hw_contexts = 8;
1829
1830                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835                break;
1836        case CHIP_TONGA:
1837                adev->gfx.config.max_shader_engines = 4;
1838                adev->gfx.config.max_tile_pipes = 8;
1839                adev->gfx.config.max_cu_per_sh = 8;
1840                adev->gfx.config.max_sh_per_se = 1;
1841                adev->gfx.config.max_backends_per_se = 2;
1842                adev->gfx.config.max_texture_channel_caches = 8;
1843                adev->gfx.config.max_gprs = 256;
1844                adev->gfx.config.max_gs_threads = 32;
1845                adev->gfx.config.max_hw_contexts = 8;
1846
1847                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852                break;
1853        case CHIP_CARRIZO:
1854                adev->gfx.config.max_shader_engines = 1;
1855                adev->gfx.config.max_tile_pipes = 2;
1856                adev->gfx.config.max_sh_per_se = 1;
1857                adev->gfx.config.max_backends_per_se = 2;
1858                adev->gfx.config.max_cu_per_sh = 8;
1859                adev->gfx.config.max_texture_channel_caches = 2;
1860                adev->gfx.config.max_gprs = 256;
1861                adev->gfx.config.max_gs_threads = 32;
1862                adev->gfx.config.max_hw_contexts = 8;
1863
1864                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869                break;
1870        case CHIP_STONEY:
1871                adev->gfx.config.max_shader_engines = 1;
1872                adev->gfx.config.max_tile_pipes = 2;
1873                adev->gfx.config.max_sh_per_se = 1;
1874                adev->gfx.config.max_backends_per_se = 1;
1875                adev->gfx.config.max_cu_per_sh = 3;
1876                adev->gfx.config.max_texture_channel_caches = 2;
1877                adev->gfx.config.max_gprs = 256;
1878                adev->gfx.config.max_gs_threads = 16;
1879                adev->gfx.config.max_hw_contexts = 8;
1880
1881                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886                break;
1887        default:
1888                adev->gfx.config.max_shader_engines = 2;
1889                adev->gfx.config.max_tile_pipes = 4;
1890                adev->gfx.config.max_cu_per_sh = 2;
1891                adev->gfx.config.max_sh_per_se = 1;
1892                adev->gfx.config.max_backends_per_se = 2;
1893                adev->gfx.config.max_texture_channel_caches = 4;
1894                adev->gfx.config.max_gprs = 256;
1895                adev->gfx.config.max_gs_threads = 32;
1896                adev->gfx.config.max_hw_contexts = 8;
1897
1898                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1903                break;
1904        }
1905
1906        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1907        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1908        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1909
1910        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1911        adev->gfx.config.mem_max_burst_length_bytes = 256;
1912        if (adev->flags & AMD_IS_APU) {
1913                /* Get memory bank mapping mode. */
1914                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1915                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1916                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1917
1918                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1919                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1920                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1921
1922                /* Validate settings in case only one DIMM installed. */
1923                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1924                        dimm00_addr_map = 0;
1925                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1926                        dimm01_addr_map = 0;
1927                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1928                        dimm10_addr_map = 0;
1929                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1930                        dimm11_addr_map = 0;
1931
1932                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1933                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1934                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1935                        adev->gfx.config.mem_row_size_in_kb = 2;
1936                else
1937                        adev->gfx.config.mem_row_size_in_kb = 1;
1938        } else {
1939                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1940                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1941                if (adev->gfx.config.mem_row_size_in_kb > 4)
1942                        adev->gfx.config.mem_row_size_in_kb = 4;
1943        }
1944
1945        adev->gfx.config.shader_engine_tile_size = 32;
1946        adev->gfx.config.num_gpus = 1;
1947        adev->gfx.config.multi_gpu_tile_size = 64;
1948
1949        /* fix up row size */
1950        switch (adev->gfx.config.mem_row_size_in_kb) {
1951        case 1:
1952        default:
1953                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1954                break;
1955        case 2:
1956                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1957                break;
1958        case 4:
1959                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1960                break;
1961        }
1962        adev->gfx.config.gb_addr_config = gb_addr_config;
1963
1964        return 0;
1965}
1966
1967static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1968                                        int mec, int pipe, int queue)
1969{
1970        int r;
1971        unsigned irq_type;
1972        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1973
1974        ring = &adev->gfx.compute_ring[ring_id];
1975
1976        /* mec0 is me1 */
1977        ring->me = mec + 1;
1978        ring->pipe = pipe;
1979        ring->queue = queue;
1980
1981        ring->ring_obj = NULL;
1982        ring->use_doorbell = true;
1983        ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1984        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1985                                + (ring_id * GFX8_MEC_HPD_SIZE);
1986        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1987
1988        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1989                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1990                + ring->pipe;
1991
1992        /* type-2 packets are deprecated on MEC, use type-3 instead */
1993        r = amdgpu_ring_init(adev, ring, 1024,
1994                        &adev->gfx.eop_irq, irq_type);
1995        if (r)
1996                return r;
1997
1998
1999        return 0;
2000}
2001
2002static int gfx_v8_0_sw_init(void *handle)
2003{
2004        int i, j, k, r, ring_id;
2005        struct amdgpu_ring *ring;
2006        struct amdgpu_kiq *kiq;
2007        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2008
2009        switch (adev->asic_type) {
2010        case CHIP_TONGA:
2011        case CHIP_CARRIZO:
2012        case CHIP_FIJI:
2013        case CHIP_POLARIS10:
2014        case CHIP_POLARIS11:
2015        case CHIP_POLARIS12:
2016        case CHIP_VEGAM:
2017                adev->gfx.mec.num_mec = 2;
2018                break;
2019        case CHIP_TOPAZ:
2020        case CHIP_STONEY:
2021        default:
2022                adev->gfx.mec.num_mec = 1;
2023                break;
2024        }
2025
2026        adev->gfx.mec.num_pipe_per_mec = 4;
2027        adev->gfx.mec.num_queue_per_pipe = 8;
2028
2029        /* KIQ event */
2030        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2031        if (r)
2032                return r;
2033
2034        /* EOP Event */
2035        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2036        if (r)
2037                return r;
2038
2039        /* Privileged reg */
2040        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2041                              &adev->gfx.priv_reg_irq);
2042        if (r)
2043                return r;
2044
2045        /* Privileged inst */
2046        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2047                              &adev->gfx.priv_inst_irq);
2048        if (r)
2049                return r;
2050
2051        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2052
2053        gfx_v8_0_scratch_init(adev);
2054
2055        r = gfx_v8_0_init_microcode(adev);
2056        if (r) {
2057                DRM_ERROR("Failed to load gfx firmware!\n");
2058                return r;
2059        }
2060
2061        r = gfx_v8_0_rlc_init(adev);
2062        if (r) {
2063                DRM_ERROR("Failed to init rlc BOs!\n");
2064                return r;
2065        }
2066
2067        r = gfx_v8_0_mec_init(adev);
2068        if (r) {
2069                DRM_ERROR("Failed to init MEC BOs!\n");
2070                return r;
2071        }
2072
2073        /* set up the gfx ring */
2074        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2075                ring = &adev->gfx.gfx_ring[i];
2076                ring->ring_obj = NULL;
2077                sprintf(ring->name, "gfx");
2078                /* no gfx doorbells on iceland */
2079                if (adev->asic_type != CHIP_TOPAZ) {
2080                        ring->use_doorbell = true;
2081                        ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2082                }
2083
2084                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2085                                     AMDGPU_CP_IRQ_GFX_EOP);
2086                if (r)
2087                        return r;
2088        }
2089
2090
2091        /* set up the compute queues - allocate horizontally across pipes */
2092        ring_id = 0;
2093        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2094                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2095                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2096                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2097                                        continue;
2098
2099                                r = gfx_v8_0_compute_ring_init(adev,
2100                                                                ring_id,
2101                                                                i, k, j);
2102                                if (r)
2103                                        return r;
2104
2105                                ring_id++;
2106                        }
2107                }
2108        }
2109
2110        r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2111        if (r) {
2112                DRM_ERROR("Failed to init KIQ BOs!\n");
2113                return r;
2114        }
2115
2116        kiq = &adev->gfx.kiq;
2117        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118        if (r)
2119                return r;
2120
2121        /* create MQD for all compute queues as well as KIQ for SRIOV case */
2122        r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2123        if (r)
2124                return r;
2125
2126        /* reserve GDS, GWS and OA resource for gfx */
2127        r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2128                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2129                                    &adev->gds.gds_gfx_bo, NULL, NULL);
2130        if (r)
2131                return r;
2132
2133        r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2134                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2135                                    &adev->gds.gws_gfx_bo, NULL, NULL);
2136        if (r)
2137                return r;
2138
2139        r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2140                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2141                                    &adev->gds.oa_gfx_bo, NULL, NULL);
2142        if (r)
2143                return r;
2144
2145        adev->gfx.ce_ram_size = 0x8000;
2146
2147        r = gfx_v8_0_gpu_early_init(adev);
2148        if (r)
2149                return r;
2150
2151        return 0;
2152}
2153
2154static int gfx_v8_0_sw_fini(void *handle)
2155{
2156        int i;
2157        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2158
2159        amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2160        amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2161        amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2162
2163        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2164                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2165        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2166                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2167
2168        amdgpu_gfx_compute_mqd_sw_fini(adev);
2169        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2170        amdgpu_gfx_kiq_fini(adev);
2171
2172        gfx_v8_0_mec_fini(adev);
2173        gfx_v8_0_rlc_fini(adev);
2174        amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2175                                &adev->gfx.rlc.clear_state_gpu_addr,
2176                                (void **)&adev->gfx.rlc.cs_ptr);
2177        if ((adev->asic_type == CHIP_CARRIZO) ||
2178            (adev->asic_type == CHIP_STONEY)) {
2179                amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2180                                &adev->gfx.rlc.cp_table_gpu_addr,
2181                                (void **)&adev->gfx.rlc.cp_table_ptr);
2182        }
2183        gfx_v8_0_free_microcode(adev);
2184
2185        return 0;
2186}
2187
2188static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2189{
2190        uint32_t *modearray, *mod2array;
2191        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2192        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2193        u32 reg_offset;
2194
2195        modearray = adev->gfx.config.tile_mode_array;
2196        mod2array = adev->gfx.config.macrotile_mode_array;
2197
2198        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199                modearray[reg_offset] = 0;
2200
2201        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2202                mod2array[reg_offset] = 0;
2203
2204        switch (adev->asic_type) {
2205        case CHIP_TOPAZ:
2206                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207                                PIPE_CONFIG(ADDR_SURF_P2) |
2208                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2209                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                PIPE_CONFIG(ADDR_SURF_P2) |
2212                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2213                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215                                PIPE_CONFIG(ADDR_SURF_P2) |
2216                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2217                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                PIPE_CONFIG(ADDR_SURF_P2) |
2220                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2221                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223                                PIPE_CONFIG(ADDR_SURF_P2) |
2224                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227                                PIPE_CONFIG(ADDR_SURF_P2) |
2228                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231                                PIPE_CONFIG(ADDR_SURF_P2) |
2232                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2233                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2235                                PIPE_CONFIG(ADDR_SURF_P2));
2236                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237                                PIPE_CONFIG(ADDR_SURF_P2) |
2238                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                 PIPE_CONFIG(ADDR_SURF_P2) |
2242                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P2) |
2246                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2247                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2248                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                 PIPE_CONFIG(ADDR_SURF_P2) |
2250                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2) |
2254                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2257                                 PIPE_CONFIG(ADDR_SURF_P2) |
2258                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2261                                 PIPE_CONFIG(ADDR_SURF_P2) |
2262                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2264                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265                                 PIPE_CONFIG(ADDR_SURF_P2) |
2266                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2267                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2269                                 PIPE_CONFIG(ADDR_SURF_P2) |
2270                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2273                                 PIPE_CONFIG(ADDR_SURF_P2) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2277                                 PIPE_CONFIG(ADDR_SURF_P2) |
2278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2281                                 PIPE_CONFIG(ADDR_SURF_P2) |
2282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2283                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2285                                 PIPE_CONFIG(ADDR_SURF_P2) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289                                 PIPE_CONFIG(ADDR_SURF_P2) |
2290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2293                                 PIPE_CONFIG(ADDR_SURF_P2) |
2294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297                                 PIPE_CONFIG(ADDR_SURF_P2) |
2298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301                                 PIPE_CONFIG(ADDR_SURF_P2) |
2302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2305                                 PIPE_CONFIG(ADDR_SURF_P2) |
2306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2308
2309                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312                                NUM_BANKS(ADDR_SURF_8_BANK));
2313                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316                                NUM_BANKS(ADDR_SURF_8_BANK));
2317                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2320                                NUM_BANKS(ADDR_SURF_8_BANK));
2321                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324                                NUM_BANKS(ADDR_SURF_8_BANK));
2325                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328                                NUM_BANKS(ADDR_SURF_8_BANK));
2329                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332                                NUM_BANKS(ADDR_SURF_8_BANK));
2333                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336                                NUM_BANKS(ADDR_SURF_8_BANK));
2337                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2339                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340                                NUM_BANKS(ADDR_SURF_16_BANK));
2341                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2342                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344                                NUM_BANKS(ADDR_SURF_16_BANK));
2345                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348                                 NUM_BANKS(ADDR_SURF_16_BANK));
2349                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2350                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352                                 NUM_BANKS(ADDR_SURF_16_BANK));
2353                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356                                 NUM_BANKS(ADDR_SURF_16_BANK));
2357                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360                                 NUM_BANKS(ADDR_SURF_16_BANK));
2361                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                 NUM_BANKS(ADDR_SURF_8_BANK));
2365
2366                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2367                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2368                            reg_offset != 23)
2369                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2370
2371                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2372                        if (reg_offset != 7)
2373                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374
2375                break;
2376        case CHIP_FIJI:
2377        case CHIP_VEGAM:
2378                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2381                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2385                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2389                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2393                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2408                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2411                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2412                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2437                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2453                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2457                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2461                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2469                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2473                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2477                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2481                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500
2501                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504                                NUM_BANKS(ADDR_SURF_8_BANK));
2505                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508                                NUM_BANKS(ADDR_SURF_8_BANK));
2509                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                NUM_BANKS(ADDR_SURF_8_BANK));
2513                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516                                NUM_BANKS(ADDR_SURF_8_BANK));
2517                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                                NUM_BANKS(ADDR_SURF_8_BANK));
2521                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                NUM_BANKS(ADDR_SURF_8_BANK));
2525                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528                                NUM_BANKS(ADDR_SURF_8_BANK));
2529                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2531                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532                                NUM_BANKS(ADDR_SURF_8_BANK));
2533                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536                                NUM_BANKS(ADDR_SURF_8_BANK));
2537                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540                                 NUM_BANKS(ADDR_SURF_8_BANK));
2541                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544                                 NUM_BANKS(ADDR_SURF_8_BANK));
2545                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                 NUM_BANKS(ADDR_SURF_8_BANK));
2549                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552                                 NUM_BANKS(ADDR_SURF_8_BANK));
2553                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556                                 NUM_BANKS(ADDR_SURF_4_BANK));
2557
2558                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2559                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2560
2561                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2562                        if (reg_offset != 7)
2563                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2564
2565                break;
2566        case CHIP_TONGA:
2567                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2574                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2582                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2601                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2642                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2646                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2650                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2658                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2666                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2670                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689
2690                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                NUM_BANKS(ADDR_SURF_16_BANK));
2694                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697                                NUM_BANKS(ADDR_SURF_16_BANK));
2698                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701                                NUM_BANKS(ADDR_SURF_16_BANK));
2702                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705                                NUM_BANKS(ADDR_SURF_16_BANK));
2706                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709                                NUM_BANKS(ADDR_SURF_16_BANK));
2710                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713                                NUM_BANKS(ADDR_SURF_16_BANK));
2714                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717                                NUM_BANKS(ADDR_SURF_16_BANK));
2718                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2720                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                NUM_BANKS(ADDR_SURF_16_BANK));
2722                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725                                NUM_BANKS(ADDR_SURF_16_BANK));
2726                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733                                 NUM_BANKS(ADDR_SURF_16_BANK));
2734                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737                                 NUM_BANKS(ADDR_SURF_8_BANK));
2738                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741                                 NUM_BANKS(ADDR_SURF_4_BANK));
2742                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2745                                 NUM_BANKS(ADDR_SURF_4_BANK));
2746
2747                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2748                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2749
2750                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2751                        if (reg_offset != 7)
2752                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2753
2754                break;
2755        case CHIP_POLARIS11:
2756        case CHIP_POLARIS12:
2757                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2788                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2816                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2832                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2840                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2848                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2850                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2852                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2856                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2860                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879
2880                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883                                NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888                                NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893                                NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2903                                NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908                                NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913                                NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2917                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2921                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928                                NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938                                NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943                                NUM_BANKS(ADDR_SURF_8_BANK));
2944
2945                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948                                NUM_BANKS(ADDR_SURF_4_BANK));
2949
2950                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2951                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2952
2953                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954                        if (reg_offset != 7)
2955                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2956
2957                break;
2958        case CHIP_POLARIS10:
2959                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2962                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2966                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2970                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2974                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2989                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2990                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2992                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2993                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3008                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3018                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3034                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3038                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3042                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3050                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3051                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3058                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3062                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081
3082                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3089                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095                                NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100                                NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3104                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110                                NUM_BANKS(ADDR_SURF_16_BANK));
3111
3112                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115                                NUM_BANKS(ADDR_SURF_16_BANK));
3116
3117                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120                                NUM_BANKS(ADDR_SURF_16_BANK));
3121
3122                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125                                NUM_BANKS(ADDR_SURF_16_BANK));
3126
3127                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130                                NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3135                                NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140                                NUM_BANKS(ADDR_SURF_8_BANK));
3141
3142                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3145                                NUM_BANKS(ADDR_SURF_4_BANK));
3146
3147                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3150                                NUM_BANKS(ADDR_SURF_4_BANK));
3151
3152                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3153                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3154
3155                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3156                        if (reg_offset != 7)
3157                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3158
3159                break;
3160        case CHIP_STONEY:
3161                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162                                PIPE_CONFIG(ADDR_SURF_P2) |
3163                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3164                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166                                PIPE_CONFIG(ADDR_SURF_P2) |
3167                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3168                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                PIPE_CONFIG(ADDR_SURF_P2) |
3171                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3172                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                PIPE_CONFIG(ADDR_SURF_P2) |
3175                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3176                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                PIPE_CONFIG(ADDR_SURF_P2) |
3179                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3182                                PIPE_CONFIG(ADDR_SURF_P2) |
3183                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186                                PIPE_CONFIG(ADDR_SURF_P2) |
3187                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3190                                PIPE_CONFIG(ADDR_SURF_P2));
3191                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192                                PIPE_CONFIG(ADDR_SURF_P2) |
3193                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                 PIPE_CONFIG(ADDR_SURF_P2) |
3197                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3200                                 PIPE_CONFIG(ADDR_SURF_P2) |
3201                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3204                                 PIPE_CONFIG(ADDR_SURF_P2) |
3205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208                                 PIPE_CONFIG(ADDR_SURF_P2) |
3209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3212                                 PIPE_CONFIG(ADDR_SURF_P2) |
3213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3216                                 PIPE_CONFIG(ADDR_SURF_P2) |
3217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3219                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220                                 PIPE_CONFIG(ADDR_SURF_P2) |
3221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3224                                 PIPE_CONFIG(ADDR_SURF_P2) |
3225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2) |
3229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3232                                 PIPE_CONFIG(ADDR_SURF_P2) |
3233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3236                                 PIPE_CONFIG(ADDR_SURF_P2) |
3237                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3240                                 PIPE_CONFIG(ADDR_SURF_P2) |
3241                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3244                                 PIPE_CONFIG(ADDR_SURF_P2) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3248                                 PIPE_CONFIG(ADDR_SURF_P2) |
3249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3250                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252                                 PIPE_CONFIG(ADDR_SURF_P2) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256                                 PIPE_CONFIG(ADDR_SURF_P2) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260                                 PIPE_CONFIG(ADDR_SURF_P2) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263
3264                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267                                NUM_BANKS(ADDR_SURF_8_BANK));
3268                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271                                NUM_BANKS(ADDR_SURF_8_BANK));
3272                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275                                NUM_BANKS(ADDR_SURF_8_BANK));
3276                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279                                NUM_BANKS(ADDR_SURF_8_BANK));
3280                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                NUM_BANKS(ADDR_SURF_8_BANK));
3284                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287                                NUM_BANKS(ADDR_SURF_8_BANK));
3288                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291                                NUM_BANKS(ADDR_SURF_8_BANK));
3292                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3294                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295                                NUM_BANKS(ADDR_SURF_16_BANK));
3296                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3297                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299                                NUM_BANKS(ADDR_SURF_16_BANK));
3300                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                 NUM_BANKS(ADDR_SURF_16_BANK));
3304                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                 NUM_BANKS(ADDR_SURF_16_BANK));
3308                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                 NUM_BANKS(ADDR_SURF_16_BANK));
3312                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315                                 NUM_BANKS(ADDR_SURF_16_BANK));
3316                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320
3321                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3322                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3323                            reg_offset != 23)
3324                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3325
3326                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3327                        if (reg_offset != 7)
3328                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329
3330                break;
3331        default:
3332                dev_warn(adev->dev,
3333                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3334                         adev->asic_type);
3335
3336        case CHIP_CARRIZO:
3337                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338                                PIPE_CONFIG(ADDR_SURF_P2) |
3339                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3340                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342                                PIPE_CONFIG(ADDR_SURF_P2) |
3343                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3344                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346                                PIPE_CONFIG(ADDR_SURF_P2) |
3347                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3348                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350                                PIPE_CONFIG(ADDR_SURF_P2) |
3351                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3352                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354                                PIPE_CONFIG(ADDR_SURF_P2) |
3355                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3358                                PIPE_CONFIG(ADDR_SURF_P2) |
3359                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362                                PIPE_CONFIG(ADDR_SURF_P2) |
3363                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3364                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3365                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3366                                PIPE_CONFIG(ADDR_SURF_P2));
3367                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3368                                PIPE_CONFIG(ADDR_SURF_P2) |
3369                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3372                                 PIPE_CONFIG(ADDR_SURF_P2) |
3373                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3376                                 PIPE_CONFIG(ADDR_SURF_P2) |
3377                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3378                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3380                                 PIPE_CONFIG(ADDR_SURF_P2) |
3381                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384                                 PIPE_CONFIG(ADDR_SURF_P2) |
3385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3388                                 PIPE_CONFIG(ADDR_SURF_P2) |
3389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3391                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3392                                 PIPE_CONFIG(ADDR_SURF_P2) |
3393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3395                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396                                 PIPE_CONFIG(ADDR_SURF_P2) |
3397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3398                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3400                                 PIPE_CONFIG(ADDR_SURF_P2) |
3401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2) |
3405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3408                                 PIPE_CONFIG(ADDR_SURF_P2) |
3409                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3412                                 PIPE_CONFIG(ADDR_SURF_P2) |
3413                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3414                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3416                                 PIPE_CONFIG(ADDR_SURF_P2) |
3417                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3420                                 PIPE_CONFIG(ADDR_SURF_P2) |
3421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3424                                 PIPE_CONFIG(ADDR_SURF_P2) |
3425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3426                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3427                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428                                 PIPE_CONFIG(ADDR_SURF_P2) |
3429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432                                 PIPE_CONFIG(ADDR_SURF_P2) |
3433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436                                 PIPE_CONFIG(ADDR_SURF_P2) |
3437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3438                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439
3440                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443                                NUM_BANKS(ADDR_SURF_8_BANK));
3444                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447                                NUM_BANKS(ADDR_SURF_8_BANK));
3448                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451                                NUM_BANKS(ADDR_SURF_8_BANK));
3452                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455                                NUM_BANKS(ADDR_SURF_8_BANK));
3456                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459                                NUM_BANKS(ADDR_SURF_8_BANK));
3460                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463                                NUM_BANKS(ADDR_SURF_8_BANK));
3464                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3465                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3466                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3467                                NUM_BANKS(ADDR_SURF_8_BANK));
3468                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3470                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471                                NUM_BANKS(ADDR_SURF_16_BANK));
3472                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3473                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475                                NUM_BANKS(ADDR_SURF_16_BANK));
3476                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479                                 NUM_BANKS(ADDR_SURF_16_BANK));
3480                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3481                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483                                 NUM_BANKS(ADDR_SURF_16_BANK));
3484                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3486                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487                                 NUM_BANKS(ADDR_SURF_16_BANK));
3488                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3491                                 NUM_BANKS(ADDR_SURF_16_BANK));
3492                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495                                 NUM_BANKS(ADDR_SURF_8_BANK));
3496
3497                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3498                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3499                            reg_offset != 23)
3500                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3501
3502                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3503                        if (reg_offset != 7)
3504                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3505
3506                break;
3507        }
3508}
3509
3510static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3511                                  u32 se_num, u32 sh_num, u32 instance)
3512{
3513        u32 data;
3514
3515        if (instance == 0xffffffff)
3516                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3517        else
3518                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3519
3520        if (se_num == 0xffffffff)
3521                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3522        else
3523                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3524
3525        if (sh_num == 0xffffffff)
3526                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3527        else
3528                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3529
3530        WREG32(mmGRBM_GFX_INDEX, data);
3531}
3532
3533static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3534                                  u32 me, u32 pipe, u32 q)
3535{
3536        vi_srbm_select(adev, me, pipe, q, 0);
3537}
3538
3539static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3540{
3541        u32 data, mask;
3542
3543        data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3544                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3545
3546        data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3547
3548        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3549                                         adev->gfx.config.max_sh_per_se);
3550
3551        return (~data) & mask;
3552}
3553
3554static void
3555gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3556{
3557        switch (adev->asic_type) {
3558        case CHIP_FIJI:
3559        case CHIP_VEGAM:
3560                *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3561                          RB_XSEL2(1) | PKR_MAP(2) |
3562                          PKR_XSEL(1) | PKR_YSEL(1) |
3563                          SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3564                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3565                           SE_PAIR_YSEL(2);
3566                break;
3567        case CHIP_TONGA:
3568        case CHIP_POLARIS10:
3569                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3570                          SE_XSEL(1) | SE_YSEL(1);
3571                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572                           SE_PAIR_YSEL(2);
3573                break;
3574        case CHIP_TOPAZ:
3575        case CHIP_CARRIZO:
3576                *rconf |= RB_MAP_PKR0(2);
3577                *rconf1 |= 0x0;
3578                break;
3579        case CHIP_POLARIS11:
3580        case CHIP_POLARIS12:
3581                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3582                          SE_XSEL(1) | SE_YSEL(1);
3583                *rconf1 |= 0x0;
3584                break;
3585        case CHIP_STONEY:
3586                *rconf |= 0x0;
3587                *rconf1 |= 0x0;
3588                break;
3589        default:
3590                DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3591                break;
3592        }
3593}
3594
3595static void
3596gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3597                                        u32 raster_config, u32 raster_config_1,
3598                                        unsigned rb_mask, unsigned num_rb)
3599{
3600        unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3601        unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3602        unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3603        unsigned rb_per_se = num_rb / num_se;
3604        unsigned se_mask[4];
3605        unsigned se;
3606
3607        se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3608        se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3609        se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3610        se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3611
3612        WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3613        WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3614        WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3615
3616        if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3617                             (!se_mask[2] && !se_mask[3]))) {
3618                raster_config_1 &= ~SE_PAIR_MAP_MASK;
3619
3620                if (!se_mask[0] && !se_mask[1]) {
3621                        raster_config_1 |=
3622                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3623                } else {
3624                        raster_config_1 |=
3625                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3626                }
3627        }
3628
3629        for (se = 0; se < num_se; se++) {
3630                unsigned raster_config_se = raster_config;
3631                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3632                unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3633                int idx = (se / 2) * 2;
3634
3635                if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3636                        raster_config_se &= ~SE_MAP_MASK;
3637
3638                        if (!se_mask[idx]) {
3639                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3640                        } else {
3641                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3642                        }
3643                }
3644
3645                pkr0_mask &= rb_mask;
3646                pkr1_mask &= rb_mask;
3647                if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3648                        raster_config_se &= ~PKR_MAP_MASK;
3649
3650                        if (!pkr0_mask) {
3651                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3652                        } else {
3653                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3654                        }
3655                }
3656
3657                if (rb_per_se >= 2) {
3658                        unsigned rb0_mask = 1 << (se * rb_per_se);
3659                        unsigned rb1_mask = rb0_mask << 1;
3660
3661                        rb0_mask &= rb_mask;
3662                        rb1_mask &= rb_mask;
3663                        if (!rb0_mask || !rb1_mask) {
3664                                raster_config_se &= ~RB_MAP_PKR0_MASK;
3665
3666                                if (!rb0_mask) {
3667                                        raster_config_se |=
3668                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3669                                } else {
3670                                        raster_config_se |=
3671                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3672                                }
3673                        }
3674
3675                        if (rb_per_se > 2) {
3676                                rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3677                                rb1_mask = rb0_mask << 1;
3678                                rb0_mask &= rb_mask;
3679                                rb1_mask &= rb_mask;
3680                                if (!rb0_mask || !rb1_mask) {
3681                                        raster_config_se &= ~RB_MAP_PKR1_MASK;
3682
3683                                        if (!rb0_mask) {
3684                                                raster_config_se |=
3685                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3686                                        } else {
3687                                                raster_config_se |=
3688                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3689                                        }
3690                                }
3691                        }
3692                }
3693
3694                /* GRBM_GFX_INDEX has a different offset on VI */
3695                gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3696                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3697                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3698        }
3699
3700        /* GRBM_GFX_INDEX has a different offset on VI */
3701        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3702}
3703
3704static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3705{
3706        int i, j;
3707        u32 data;
3708        u32 raster_config = 0, raster_config_1 = 0;
3709        u32 active_rbs = 0;
3710        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3711                                        adev->gfx.config.max_sh_per_se;
3712        unsigned num_rb_pipes;
3713
3714        mutex_lock(&adev->grbm_idx_mutex);
3715        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3716                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3717                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3718                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3719                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3720                                               rb_bitmap_width_per_sh);
3721                }
3722        }
3723        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3724
3725        adev->gfx.config.backend_enable_mask = active_rbs;
3726        adev->gfx.config.num_rbs = hweight32(active_rbs);
3727
3728        num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3729                             adev->gfx.config.max_shader_engines, 16);
3730
3731        gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3732
3733        if (!adev->gfx.config.backend_enable_mask ||
3734                        adev->gfx.config.num_rbs >= num_rb_pipes) {
3735                WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3736                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3737        } else {
3738                gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3739                                                        adev->gfx.config.backend_enable_mask,
3740                                                        num_rb_pipes);
3741        }
3742
3743        /* cache the values for userspace */
3744        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3745                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3746                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3747                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
3748                                RREG32(mmCC_RB_BACKEND_DISABLE);
3749                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3750                                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3751                        adev->gfx.config.rb_config[i][j].raster_config =
3752                                RREG32(mmPA_SC_RASTER_CONFIG);
3753                        adev->gfx.config.rb_config[i][j].raster_config_1 =
3754                                RREG32(mmPA_SC_RASTER_CONFIG_1);
3755                }
3756        }
3757        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3758        mutex_unlock(&adev->grbm_idx_mutex);
3759}
3760
3761/**
3762 * gfx_v8_0_init_compute_vmid - gart enable
3763 *
3764 * @adev: amdgpu_device pointer
3765 *
3766 * Initialize compute vmid sh_mem registers
3767 *
3768 */
3769#define DEFAULT_SH_MEM_BASES    (0x6000)
3770#define FIRST_COMPUTE_VMID      (8)
3771#define LAST_COMPUTE_VMID       (16)
3772static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3773{
3774        int i;
3775        uint32_t sh_mem_config;
3776        uint32_t sh_mem_bases;
3777
3778        /*
3779         * Configure apertures:
3780         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3781         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3782         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3783         */
3784        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3785
3786        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3787                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3788                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3789                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3790                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3791                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3792
3793        mutex_lock(&adev->srbm_mutex);
3794        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3795                vi_srbm_select(adev, 0, 0, 0, i);
3796                /* CP and shaders */
3797                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3798                WREG32(mmSH_MEM_APE1_BASE, 1);
3799                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3800                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3801        }
3802        vi_srbm_select(adev, 0, 0, 0, 0);
3803        mutex_unlock(&adev->srbm_mutex);
3804}
3805
3806static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3807{
3808        switch (adev->asic_type) {
3809        default:
3810                adev->gfx.config.double_offchip_lds_buf = 1;
3811                break;
3812        case CHIP_CARRIZO:
3813        case CHIP_STONEY:
3814                adev->gfx.config.double_offchip_lds_buf = 0;
3815                break;
3816        }
3817}
3818
3819static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3820{
3821        u32 tmp, sh_static_mem_cfg;
3822        int i;
3823
3824        WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3825        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3826        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3827        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3828
3829        gfx_v8_0_tiling_mode_table_init(adev);
3830        gfx_v8_0_setup_rb(adev);
3831        gfx_v8_0_get_cu_info(adev);
3832        gfx_v8_0_config_init(adev);
3833
3834        /* XXX SH_MEM regs */
3835        /* where to put LDS, scratch, GPUVM in FSA64 space */
3836        sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3837                                   SWIZZLE_ENABLE, 1);
3838        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3839                                   ELEMENT_SIZE, 1);
3840        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3841                                   INDEX_STRIDE, 3);
3842        WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3843
3844        mutex_lock(&adev->srbm_mutex);
3845        for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3846                vi_srbm_select(adev, 0, 0, 0, i);
3847                /* CP and shaders */
3848                if (i == 0) {
3849                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3850                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3851                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3852                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3853                        WREG32(mmSH_MEM_CONFIG, tmp);
3854                        WREG32(mmSH_MEM_BASES, 0);
3855                } else {
3856                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3858                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860                        WREG32(mmSH_MEM_CONFIG, tmp);
3861                        tmp = adev->gmc.shared_aperture_start >> 48;
3862                        WREG32(mmSH_MEM_BASES, tmp);
3863                }
3864
3865                WREG32(mmSH_MEM_APE1_BASE, 1);
3866                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3867        }
3868        vi_srbm_select(adev, 0, 0, 0, 0);
3869        mutex_unlock(&adev->srbm_mutex);
3870
3871        gfx_v8_0_init_compute_vmid(adev);
3872
3873        mutex_lock(&adev->grbm_idx_mutex);
3874        /*
3875         * making sure that the following register writes will be broadcasted
3876         * to all the shaders
3877         */
3878        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3879
3880        WREG32(mmPA_SC_FIFO_SIZE,
3881                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3882                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3883                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3884                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3885                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3886                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3887                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3888                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3889
3890        tmp = RREG32(mmSPI_ARB_PRIORITY);
3891        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3892        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3893        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3894        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3895        WREG32(mmSPI_ARB_PRIORITY, tmp);
3896
3897        mutex_unlock(&adev->grbm_idx_mutex);
3898
3899}
3900
3901static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902{
3903        u32 i, j, k;
3904        u32 mask;
3905
3906        mutex_lock(&adev->grbm_idx_mutex);
3907        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3908                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3909                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3910                        for (k = 0; k < adev->usec_timeout; k++) {
3911                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3912                                        break;
3913                                udelay(1);
3914                        }
3915                        if (k == adev->usec_timeout) {
3916                                gfx_v8_0_select_se_sh(adev, 0xffffffff,
3917                                                      0xffffffff, 0xffffffff);
3918                                mutex_unlock(&adev->grbm_idx_mutex);
3919                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3920                                         i, j);
3921                                return;
3922                        }
3923                }
3924        }
3925        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3926        mutex_unlock(&adev->grbm_idx_mutex);
3927
3928        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3929                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3930                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3931                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3932        for (k = 0; k < adev->usec_timeout; k++) {
3933                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3934                        break;
3935                udelay(1);
3936        }
3937}
3938
3939static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3940                                               bool enable)
3941{
3942        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3943
3944        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3945        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3946        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3947        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3948
3949        WREG32(mmCP_INT_CNTL_RING0, tmp);
3950}
3951
3952static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3953{
3954        /* csib */
3955        WREG32(mmRLC_CSIB_ADDR_HI,
3956                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3957        WREG32(mmRLC_CSIB_ADDR_LO,
3958                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3959        WREG32(mmRLC_CSIB_LENGTH,
3960                        adev->gfx.rlc.clear_state_size);
3961}
3962
3963static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3964                                int ind_offset,
3965                                int list_size,
3966                                int *unique_indices,
3967                                int *indices_count,
3968                                int max_indices,
3969                                int *ind_start_offsets,
3970                                int *offset_count,
3971                                int max_offset)
3972{
3973        int indices;
3974        bool new_entry = true;
3975
3976        for (; ind_offset < list_size; ind_offset++) {
3977
3978                if (new_entry) {
3979                        new_entry = false;
3980                        ind_start_offsets[*offset_count] = ind_offset;
3981                        *offset_count = *offset_count + 1;
3982                        BUG_ON(*offset_count >= max_offset);
3983                }
3984
3985                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3986                        new_entry = true;
3987                        continue;
3988                }
3989
3990                ind_offset += 2;
3991
3992                /* look for the matching indice */
3993                for (indices = 0;
3994                        indices < *indices_count;
3995                        indices++) {
3996                        if (unique_indices[indices] ==
3997                                register_list_format[ind_offset])
3998                                break;
3999                }
4000
4001                if (indices >= *indices_count) {
4002                        unique_indices[*indices_count] =
4003                                register_list_format[ind_offset];
4004                        indices = *indices_count;
4005                        *indices_count = *indices_count + 1;
4006                        BUG_ON(*indices_count >= max_indices);
4007                }
4008
4009                register_list_format[ind_offset] = indices;
4010        }
4011}
4012
4013static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4014{
4015        int i, temp, data;
4016        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4017        int indices_count = 0;
4018        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4019        int offset_count = 0;
4020
4021        int list_size;
4022        unsigned int *register_list_format =
4023                kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4024        if (!register_list_format)
4025                return -ENOMEM;
4026        memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4027                        adev->gfx.rlc.reg_list_format_size_bytes);
4028
4029        gfx_v8_0_parse_ind_reg_list(register_list_format,
4030                                RLC_FormatDirectRegListLength,
4031                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4032                                unique_indices,
4033                                &indices_count,
4034                                ARRAY_SIZE(unique_indices),
4035                                indirect_start_offsets,
4036                                &offset_count,
4037                                ARRAY_SIZE(indirect_start_offsets));
4038
4039        /* save and restore list */
4040        WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4041
4042        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4043        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4044                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4045
4046        /* indirect list */
4047        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4048        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4049                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4050
4051        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4052        list_size = list_size >> 1;
4053        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4054        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4055
4056        /* starting offsets starts */
4057        WREG32(mmRLC_GPM_SCRATCH_ADDR,
4058                adev->gfx.rlc.starting_offsets_start);
4059        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4060                WREG32(mmRLC_GPM_SCRATCH_DATA,
4061                                indirect_start_offsets[i]);
4062
4063        /* unique indices */
4064        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4065        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4066        for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4067                if (unique_indices[i] != 0) {
4068                        WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4069                        WREG32(data + i, unique_indices[i] >> 20);
4070                }
4071        }
4072        kfree(register_list_format);
4073
4074        return 0;
4075}
4076
4077static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4078{
4079        WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4080}
4081
4082static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4083{
4084        uint32_t data;
4085
4086        WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4087
4088        data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4089        data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4090        data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4091        data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4092        WREG32(mmRLC_PG_DELAY, data);
4093
4094        WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4095        WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4096
4097}
4098
4099static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4100                                                bool enable)
4101{
4102        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4103}
4104
4105static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4106                                                  bool enable)
4107{
4108        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4109}
4110
4111static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4112{
4113        WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4114}
4115
4116static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4117{
4118        if ((adev->asic_type == CHIP_CARRIZO) ||
4119            (adev->asic_type == CHIP_STONEY)) {
4120                gfx_v8_0_init_csb(adev);
4121                gfx_v8_0_init_save_restore_list(adev);
4122                gfx_v8_0_enable_save_restore_machine(adev);
4123                WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4124                gfx_v8_0_init_power_gating(adev);
4125                WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4126        } else if ((adev->asic_type == CHIP_POLARIS11) ||
4127                   (adev->asic_type == CHIP_POLARIS12) ||
4128                   (adev->asic_type == CHIP_VEGAM)) {
4129                gfx_v8_0_init_csb(adev);
4130                gfx_v8_0_init_save_restore_list(adev);
4131                gfx_v8_0_enable_save_restore_machine(adev);
4132                gfx_v8_0_init_power_gating(adev);
4133        }
4134
4135}
4136
4137static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4138{
4139        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4140
4141        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4142        gfx_v8_0_wait_for_rlc_serdes(adev);
4143}
4144
4145static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4146{
4147        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4148        udelay(50);
4149
4150        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4151        udelay(50);
4152}
4153
4154static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4155{
4156        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4157
4158        /* carrizo do enable cp interrupt after cp inited */
4159        if (!(adev->flags & AMD_IS_APU))
4160                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161
4162        udelay(50);
4163}
4164
4165static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4166{
4167        const struct rlc_firmware_header_v2_0 *hdr;
4168        const __le32 *fw_data;
4169        unsigned i, fw_size;
4170
4171        if (!adev->gfx.rlc_fw)
4172                return -EINVAL;
4173
4174        hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4175        amdgpu_ucode_print_rlc_hdr(&hdr->header);
4176
4177        fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4178                           le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4179        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4180
4181        WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4182        for (i = 0; i < fw_size; i++)
4183                WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4184        WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4185
4186        return 0;
4187}
4188
4189static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4190{
4191        int r;
4192        u32 tmp;
4193
4194        gfx_v8_0_rlc_stop(adev);
4195
4196        /* disable CG */
4197        tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4198        tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4199                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4200        WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4201        if (adev->asic_type == CHIP_POLARIS11 ||
4202            adev->asic_type == CHIP_POLARIS10 ||
4203            adev->asic_type == CHIP_POLARIS12 ||
4204            adev->asic_type == CHIP_VEGAM) {
4205                tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4206                tmp &= ~0x3;
4207                WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4208        }
4209
4210        /* disable PG */
4211        WREG32(mmRLC_PG_CNTL, 0);
4212
4213        gfx_v8_0_rlc_reset(adev);
4214        gfx_v8_0_init_pg(adev);
4215
4216
4217        if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4218                /* legacy rlc firmware loading */
4219                r = gfx_v8_0_rlc_load_microcode(adev);
4220                if (r)
4221                        return r;
4222        }
4223
4224        gfx_v8_0_rlc_start(adev);
4225
4226        return 0;
4227}
4228
4229static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4230{
4231        int i;
4232        u32 tmp = RREG32(mmCP_ME_CNTL);
4233
4234        if (enable) {
4235                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4236                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4237                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238        } else {
4239                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4240                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4241                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4242                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4243                        adev->gfx.gfx_ring[i].ready = false;
4244        }
4245        WREG32(mmCP_ME_CNTL, tmp);
4246        udelay(50);
4247}
4248
4249static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250{
4251        const struct gfx_firmware_header_v1_0 *pfp_hdr;
4252        const struct gfx_firmware_header_v1_0 *ce_hdr;
4253        const struct gfx_firmware_header_v1_0 *me_hdr;
4254        const __le32 *fw_data;
4255        unsigned i, fw_size;
4256
4257        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4258                return -EINVAL;
4259
4260        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4261                adev->gfx.pfp_fw->data;
4262        ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4263                adev->gfx.ce_fw->data;
4264        me_hdr = (const struct gfx_firmware_header_v1_0 *)
4265                adev->gfx.me_fw->data;
4266
4267        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4268        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4269        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4270
4271        gfx_v8_0_cp_gfx_enable(adev, false);
4272
4273        /* PFP */
4274        fw_data = (const __le32 *)
4275                (adev->gfx.pfp_fw->data +
4276                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4277        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4278        WREG32(mmCP_PFP_UCODE_ADDR, 0);
4279        for (i = 0; i < fw_size; i++)
4280                WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4281        WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4282
4283        /* CE */
4284        fw_data = (const __le32 *)
4285                (adev->gfx.ce_fw->data +
4286                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4287        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4288        WREG32(mmCP_CE_UCODE_ADDR, 0);
4289        for (i = 0; i < fw_size; i++)
4290                WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4291        WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4292
4293        /* ME */
4294        fw_data = (const __le32 *)
4295                (adev->gfx.me_fw->data +
4296                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4297        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4298        WREG32(mmCP_ME_RAM_WADDR, 0);
4299        for (i = 0; i < fw_size; i++)
4300                WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4301        WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4302
4303        return 0;
4304}
4305
4306static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4307{
4308        u32 count = 0;
4309        const struct cs_section_def *sect = NULL;
4310        const struct cs_extent_def *ext = NULL;
4311
4312        /* begin clear state */
4313        count += 2;
4314        /* context control state */
4315        count += 3;
4316
4317        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4318                for (ext = sect->section; ext->extent != NULL; ++ext) {
4319                        if (sect->id == SECT_CONTEXT)
4320                                count += 2 + ext->reg_count;
4321                        else
4322                                return 0;
4323                }
4324        }
4325        /* pa_sc_raster_config/pa_sc_raster_config1 */
4326        count += 4;
4327        /* end clear state */
4328        count += 2;
4329        /* clear state */
4330        count += 2;
4331
4332        return count;
4333}
4334
4335static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336{
4337        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4338        const struct cs_section_def *sect = NULL;
4339        const struct cs_extent_def *ext = NULL;
4340        int r, i;
4341
4342        /* init the CP */
4343        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4344        WREG32(mmCP_ENDIAN_SWAP, 0);
4345        WREG32(mmCP_DEVICE_ID, 1);
4346
4347        gfx_v8_0_cp_gfx_enable(adev, true);
4348
4349        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4350        if (r) {
4351                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4352                return r;
4353        }
4354
4355        /* clear state buffer */
4356        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4357        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358
4359        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4360        amdgpu_ring_write(ring, 0x80000000);
4361        amdgpu_ring_write(ring, 0x80000000);
4362
4363        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4364                for (ext = sect->section; ext->extent != NULL; ++ext) {
4365                        if (sect->id == SECT_CONTEXT) {
4366                                amdgpu_ring_write(ring,
4367                                       PACKET3(PACKET3_SET_CONTEXT_REG,
4368                                               ext->reg_count));
4369                                amdgpu_ring_write(ring,
4370                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4371                                for (i = 0; i < ext->reg_count; i++)
4372                                        amdgpu_ring_write(ring, ext->extent[i]);
4373                        }
4374                }
4375        }
4376
4377        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4378        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4379        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4380        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4381
4382        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4383        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384
4385        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4386        amdgpu_ring_write(ring, 0);
4387
4388        /* init the CE partitions */
4389        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4390        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4391        amdgpu_ring_write(ring, 0x8000);
4392        amdgpu_ring_write(ring, 0x8000);
4393
4394        amdgpu_ring_commit(ring);
4395
4396        return 0;
4397}
4398static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4399{
4400        u32 tmp;
4401        /* no gfx doorbells on iceland */
4402        if (adev->asic_type == CHIP_TOPAZ)
4403                return;
4404
4405        tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406
4407        if (ring->use_doorbell) {
4408                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4409                                DOORBELL_OFFSET, ring->doorbell_index);
4410                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411                                                DOORBELL_HIT, 0);
4412                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4413                                            DOORBELL_EN, 1);
4414        } else {
4415                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4416        }
4417
4418        WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419
4420        if (adev->flags & AMD_IS_APU)
4421                return;
4422
4423        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4424                                        DOORBELL_RANGE_LOWER,
4425                                        AMDGPU_DOORBELL_GFX_RING0);
4426        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427
4428        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4429                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4430}
4431
4432static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433{
4434        struct amdgpu_ring *ring;
4435        u32 tmp;
4436        u32 rb_bufsz;
4437        u64 rb_addr, rptr_addr, wptr_gpu_addr;
4438        int r;
4439
4440        /* Set the write pointer delay */
4441        WREG32(mmCP_RB_WPTR_DELAY, 0);
4442
4443        /* set the RB to use vmid 0 */
4444        WREG32(mmCP_RB_VMID, 0);
4445
4446        /* Set ring buffer size */
4447        ring = &adev->gfx.gfx_ring[0];
4448        rb_bufsz = order_base_2(ring->ring_size / 8);
4449        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4450        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4451        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4452        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453#ifdef __BIG_ENDIAN
4454        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455#endif
4456        WREG32(mmCP_RB0_CNTL, tmp);
4457
4458        /* Initialize the ring buffer's read and write pointers */
4459        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460        ring->wptr = 0;
4461        WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4462
4463        /* set the wb address wether it's enabled or not */
4464        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4466        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467
4468        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469        WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4470        WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4471        mdelay(1);
4472        WREG32(mmCP_RB0_CNTL, tmp);
4473
4474        rb_addr = ring->gpu_addr >> 8;
4475        WREG32(mmCP_RB0_BASE, rb_addr);
4476        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477
4478        gfx_v8_0_set_cpg_door_bell(adev, ring);
4479        /* start the ring */
4480        amdgpu_ring_clear_ring(ring);
4481        gfx_v8_0_cp_gfx_start(adev);
4482        ring->ready = true;
4483        r = amdgpu_ring_test_ring(ring);
4484        if (r)
4485                ring->ready = false;
4486
4487        return r;
4488}
4489
4490static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4491{
4492        int i;
4493
4494        if (enable) {
4495                WREG32(mmCP_MEC_CNTL, 0);
4496        } else {
4497                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4498                for (i = 0; i < adev->gfx.num_compute_rings; i++)
4499                        adev->gfx.compute_ring[i].ready = false;
4500                adev->gfx.kiq.ring.ready = false;
4501        }
4502        udelay(50);
4503}
4504
4505static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506{
4507        const struct gfx_firmware_header_v1_0 *mec_hdr;
4508        const __le32 *fw_data;
4509        unsigned i, fw_size;
4510
4511        if (!adev->gfx.mec_fw)
4512                return -EINVAL;
4513
4514        gfx_v8_0_cp_compute_enable(adev, false);
4515
4516        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4517        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4518
4519        fw_data = (const __le32 *)
4520                (adev->gfx.mec_fw->data +
4521                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4522        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4523
4524        /* MEC1 */
4525        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4526        for (i = 0; i < fw_size; i++)
4527                WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4528        WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529
4530        /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4531        if (adev->gfx.mec2_fw) {
4532                const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533
4534                mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4535                amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4536
4537                fw_data = (const __le32 *)
4538                        (adev->gfx.mec2_fw->data +
4539                         le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4540                fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541
4542                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4543                for (i = 0; i < fw_size; i++)
4544                        WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4545                WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4546        }
4547
4548        return 0;
4549}
4550
4551/* KIQ functions */
4552static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4553{
4554        uint32_t tmp;
4555        struct amdgpu_device *adev = ring->adev;
4556
4557        /* tell RLC which is KIQ queue */
4558        tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559        tmp &= 0xffffff00;
4560        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4561        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562        tmp |= 0x80;
4563        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4564}
4565
4566static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4567{
4568        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4569        uint32_t scratch, tmp = 0;
4570        uint64_t queue_mask = 0;
4571        int r, i;
4572
4573        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4574                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575                        continue;
4576
4577                /* This situation may be hit in the future if a new HW
4578                 * generation exposes more than 64 queues. If so, the
4579                 * definition of queue_mask needs updating */
4580                if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4581                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4582                        break;
4583                }
4584
4585                queue_mask |= (1ull << i);
4586        }
4587
4588        r = amdgpu_gfx_scratch_get(adev, &scratch);
4589        if (r) {
4590                DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4591                return r;
4592        }
4593        WREG32(scratch, 0xCAFEDEAD);
4594
4595        r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4596        if (r) {
4597                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4598                amdgpu_gfx_scratch_free(adev, scratch);
4599                return r;
4600        }
4601        /* set resources */
4602        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4603        amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4604        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4605        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4606        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4607        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4608        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4609        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4610        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4611                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4612                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4613                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4614
4615                /* map queues */
4616                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4617                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4618                amdgpu_ring_write(kiq_ring,
4619                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4620                amdgpu_ring_write(kiq_ring,
4621                                  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4622                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4623                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4624                                  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4625                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4626                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4627                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4628                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4629        }
4630        /* write to scratch for completion */
4631        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4632        amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4633        amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4634        amdgpu_ring_commit(kiq_ring);
4635
4636        for (i = 0; i < adev->usec_timeout; i++) {
4637                tmp = RREG32(scratch);
4638                if (tmp == 0xDEADBEEF)
4639                        break;
4640                DRM_UDELAY(1);
4641        }
4642        if (i >= adev->usec_timeout) {
4643                DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4644                          scratch, tmp);
4645                r = -EINVAL;
4646        }
4647        amdgpu_gfx_scratch_free(adev, scratch);
4648
4649        return r;
4650}
4651
4652static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4653{
4654        int i, r = 0;
4655
4656        if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4657                WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4658                for (i = 0; i < adev->usec_timeout; i++) {
4659                        if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4660                                break;
4661                        udelay(1);
4662                }
4663                if (i == adev->usec_timeout)
4664                        r = -ETIMEDOUT;
4665        }
4666        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4667        WREG32(mmCP_HQD_PQ_RPTR, 0);
4668        WREG32(mmCP_HQD_PQ_WPTR, 0);
4669
4670        return r;
4671}
4672
4673static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4674{
4675        struct amdgpu_device *adev = ring->adev;
4676        struct vi_mqd *mqd = ring->mqd_ptr;
4677        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4678        uint32_t tmp;
4679
4680        mqd->header = 0xC0310800;
4681        mqd->compute_pipelinestat_enable = 0x00000001;
4682        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4683        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4684        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4685        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4686        mqd->compute_misc_reserved = 0x00000003;
4687        mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4688                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4689        mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4690                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4691        eop_base_addr = ring->eop_gpu_addr >> 8;
4692        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4693        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4694
4695        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4696        tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4697        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4698                        (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4699
4700        mqd->cp_hqd_eop_control = tmp;
4701
4702        /* enable doorbell? */
4703        tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4704                            CP_HQD_PQ_DOORBELL_CONTROL,
4705                            DOORBELL_EN,
4706                            ring->use_doorbell ? 1 : 0);
4707
4708        mqd->cp_hqd_pq_doorbell_control = tmp;
4709
4710        /* set the pointer to the MQD */
4711        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4712        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4713
4714        /* set MQD vmid to 0 */
4715        tmp = RREG32(mmCP_MQD_CONTROL);
4716        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4717        mqd->cp_mqd_control = tmp;
4718
4719        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4720        hqd_gpu_addr = ring->gpu_addr >> 8;
4721        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4722        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4723
4724        /* set up the HQD, this is similar to CP_RB0_CNTL */
4725        tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4726        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4727                            (order_base_2(ring->ring_size / 4) - 1));
4728        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4729                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4730#ifdef __BIG_ENDIAN
4731        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4732#endif
4733        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4734        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4735        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4736        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4737        mqd->cp_hqd_pq_control = tmp;
4738
4739        /* set the wb address whether it's enabled or not */
4740        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4741        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4742        mqd->cp_hqd_pq_rptr_report_addr_hi =
4743                upper_32_bits(wb_gpu_addr) & 0xffff;
4744
4745        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4746        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4747        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4748        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4749
4750        tmp = 0;
4751        /* enable the doorbell if requested */
4752        if (ring->use_doorbell) {
4753                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4754                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4755                                DOORBELL_OFFSET, ring->doorbell_index);
4756
4757                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4758                                         DOORBELL_EN, 1);
4759                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4760                                         DOORBELL_SOURCE, 0);
4761                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4762                                         DOORBELL_HIT, 0);
4763        }
4764
4765        mqd->cp_hqd_pq_doorbell_control = tmp;
4766
4767        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4768        ring->wptr = 0;
4769        mqd->cp_hqd_pq_wptr = ring->wptr;
4770        mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4771
4772        /* set the vmid for the queue */
4773        mqd->cp_hqd_vmid = 0;
4774
4775        tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4776        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4777        mqd->cp_hqd_persistent_state = tmp;
4778
4779        /* set MTYPE */
4780        tmp = RREG32(mmCP_HQD_IB_CONTROL);
4781        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4782        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4783        mqd->cp_hqd_ib_control = tmp;
4784
4785        tmp = RREG32(mmCP_HQD_IQ_TIMER);
4786        tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4787        mqd->cp_hqd_iq_timer = tmp;
4788
4789        tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4790        tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4791        mqd->cp_hqd_ctx_save_control = tmp;
4792
4793        /* defaults */
4794        mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4795        mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4796        mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4797        mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4798        mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4799        mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4800        mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4801        mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4802        mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4803        mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4804        mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4805        mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4806        mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4807        mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4808        mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4809
4810        /* activate the queue */
4811        mqd->cp_hqd_active = 1;
4812
4813        return 0;
4814}
4815
4816int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4817                        struct vi_mqd *mqd)
4818{
4819        uint32_t mqd_reg;
4820        uint32_t *mqd_data;
4821
4822        /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4823        mqd_data = &mqd->cp_mqd_base_addr_lo;
4824
4825        /* disable wptr polling */
4826        WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4827
4828        /* program all HQD registers */
4829        for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4830                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4831
4832        /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4833         * This is safe since EOP RPTR==WPTR for any inactive HQD
4834         * on ASICs that do not support context-save.
4835         * EOP writes/reads can start anywhere in the ring.
4836         */
4837        if (adev->asic_type != CHIP_TONGA) {
4838                WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4839                WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4840                WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4841        }
4842
4843        for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4844                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4845
4846        /* activate the HQD */
4847        for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4848                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4849
4850        return 0;
4851}
4852
4853static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4854{
4855        struct amdgpu_device *adev = ring->adev;
4856        struct vi_mqd *mqd = ring->mqd_ptr;
4857        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4858
4859        gfx_v8_0_kiq_setting(ring);
4860
4861        if (adev->in_gpu_reset) { /* for GPU_RESET case */
4862                /* reset MQD to a clean status */
4863                if (adev->gfx.mec.mqd_backup[mqd_idx])
4864                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4865
4866                /* reset ring buffer */
4867                ring->wptr = 0;
4868                amdgpu_ring_clear_ring(ring);
4869                mutex_lock(&adev->srbm_mutex);
4870                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4871                gfx_v8_0_mqd_commit(adev, mqd);
4872                vi_srbm_select(adev, 0, 0, 0, 0);
4873                mutex_unlock(&adev->srbm_mutex);
4874        } else {
4875                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4876                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4877                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4878                mutex_lock(&adev->srbm_mutex);
4879                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4880                gfx_v8_0_mqd_init(ring);
4881                gfx_v8_0_mqd_commit(adev, mqd);
4882                vi_srbm_select(adev, 0, 0, 0, 0);
4883                mutex_unlock(&adev->srbm_mutex);
4884
4885                if (adev->gfx.mec.mqd_backup[mqd_idx])
4886                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887        }
4888
4889        return 0;
4890}
4891
4892static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4893{
4894        struct amdgpu_device *adev = ring->adev;
4895        struct vi_mqd *mqd = ring->mqd_ptr;
4896        int mqd_idx = ring - &adev->gfx.compute_ring[0];
4897
4898        if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4899                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4900                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4901                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4902                mutex_lock(&adev->srbm_mutex);
4903                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4904                gfx_v8_0_mqd_init(ring);
4905                vi_srbm_select(adev, 0, 0, 0, 0);
4906                mutex_unlock(&adev->srbm_mutex);
4907
4908                if (adev->gfx.mec.mqd_backup[mqd_idx])
4909                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4910        } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4911                /* reset MQD to a clean status */
4912                if (adev->gfx.mec.mqd_backup[mqd_idx])
4913                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4914                /* reset ring buffer */
4915                ring->wptr = 0;
4916                amdgpu_ring_clear_ring(ring);
4917        } else {
4918                amdgpu_ring_clear_ring(ring);
4919        }
4920        return 0;
4921}
4922
4923static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4924{
4925        if (adev->asic_type > CHIP_TONGA) {
4926                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4927                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4928        }
4929        /* enable doorbells */
4930        WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4931}
4932
4933static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4934{
4935        struct amdgpu_ring *ring = NULL;
4936        int r = 0, i;
4937
4938        gfx_v8_0_cp_compute_enable(adev, true);
4939
4940        ring = &adev->gfx.kiq.ring;
4941
4942        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4943        if (unlikely(r != 0))
4944                goto done;
4945
4946        r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947        if (!r) {
4948                r = gfx_v8_0_kiq_init_queue(ring);
4949                amdgpu_bo_kunmap(ring->mqd_obj);
4950                ring->mqd_ptr = NULL;
4951        }
4952        amdgpu_bo_unreserve(ring->mqd_obj);
4953        if (r)
4954                goto done;
4955
4956        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4957                ring = &adev->gfx.compute_ring[i];
4958
4959                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4960                if (unlikely(r != 0))
4961                        goto done;
4962                r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963                if (!r) {
4964                        r = gfx_v8_0_kcq_init_queue(ring);
4965                        amdgpu_bo_kunmap(ring->mqd_obj);
4966                        ring->mqd_ptr = NULL;
4967                }
4968                amdgpu_bo_unreserve(ring->mqd_obj);
4969                if (r)
4970                        goto done;
4971        }
4972
4973        gfx_v8_0_set_mec_doorbell_range(adev);
4974
4975        r = gfx_v8_0_kiq_kcq_enable(adev);
4976        if (r)
4977                goto done;
4978
4979        /* Test KIQ */
4980        ring = &adev->gfx.kiq.ring;
4981        ring->ready = true;
4982        r = amdgpu_ring_test_ring(ring);
4983        if (r) {
4984                ring->ready = false;
4985                goto done;
4986        }
4987
4988        /* Test KCQs */
4989        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4990                ring = &adev->gfx.compute_ring[i];
4991                ring->ready = true;
4992                r = amdgpu_ring_test_ring(ring);
4993                if (r)
4994                        ring->ready = false;
4995        }
4996
4997done:
4998        return r;
4999}
5000
5001static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5002{
5003        int r;
5004
5005        if (!(adev->flags & AMD_IS_APU))
5006                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5007
5008        if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5009                        /* legacy firmware loading */
5010                r = gfx_v8_0_cp_gfx_load_microcode(adev);
5011                if (r)
5012                        return r;
5013
5014                r = gfx_v8_0_cp_compute_load_microcode(adev);
5015                if (r)
5016                        return r;
5017        }
5018
5019        r = gfx_v8_0_cp_gfx_resume(adev);
5020        if (r)
5021                return r;
5022
5023        r = gfx_v8_0_kiq_resume(adev);
5024        if (r)
5025                return r;
5026
5027        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5028
5029        return 0;
5030}
5031
5032static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033{
5034        gfx_v8_0_cp_gfx_enable(adev, enable);
5035        gfx_v8_0_cp_compute_enable(adev, enable);
5036}
5037
5038static int gfx_v8_0_hw_init(void *handle)
5039{
5040        int r;
5041        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043        gfx_v8_0_init_golden_registers(adev);
5044        gfx_v8_0_gpu_init(adev);
5045
5046        r = gfx_v8_0_rlc_resume(adev);
5047        if (r)
5048                return r;
5049
5050        r = gfx_v8_0_cp_resume(adev);
5051
5052        return r;
5053}
5054
5055static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5056{
5057        struct amdgpu_device *adev = kiq_ring->adev;
5058        uint32_t scratch, tmp = 0;
5059        int r, i;
5060
5061        r = amdgpu_gfx_scratch_get(adev, &scratch);
5062        if (r) {
5063                DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5064                return r;
5065        }
5066        WREG32(scratch, 0xCAFEDEAD);
5067
5068        r = amdgpu_ring_alloc(kiq_ring, 10);
5069        if (r) {
5070                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5071                amdgpu_gfx_scratch_free(adev, scratch);
5072                return r;
5073        }
5074
5075        /* unmap queues */
5076        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5077        amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5078                                                PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5079                                                PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5080                                                PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5081                                                PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5082        amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5083        amdgpu_ring_write(kiq_ring, 0);
5084        amdgpu_ring_write(kiq_ring, 0);
5085        amdgpu_ring_write(kiq_ring, 0);
5086        /* write to scratch for completion */
5087        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5088        amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5089        amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5090        amdgpu_ring_commit(kiq_ring);
5091
5092        for (i = 0; i < adev->usec_timeout; i++) {
5093                tmp = RREG32(scratch);
5094                if (tmp == 0xDEADBEEF)
5095                        break;
5096                DRM_UDELAY(1);
5097        }
5098        if (i >= adev->usec_timeout) {
5099                DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5100                r = -EINVAL;
5101        }
5102        amdgpu_gfx_scratch_free(adev, scratch);
5103        return r;
5104}
5105
5106static int gfx_v8_0_hw_fini(void *handle)
5107{
5108        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5109        int i;
5110
5111        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5112        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5113
5114        /* disable KCQ to avoid CPC touch memory not valid anymore */
5115        for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116                gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5117
5118        if (amdgpu_sriov_vf(adev)) {
5119                pr_debug("For SRIOV client, shouldn't do anything.\n");
5120                return 0;
5121        }
5122        gfx_v8_0_cp_enable(adev, false);
5123        gfx_v8_0_rlc_stop(adev);
5124
5125        amdgpu_device_ip_set_powergating_state(adev,
5126                                               AMD_IP_BLOCK_TYPE_GFX,
5127                                               AMD_PG_STATE_UNGATE);
5128
5129        return 0;
5130}
5131
5132static int gfx_v8_0_suspend(void *handle)
5133{
5134        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135        adev->gfx.in_suspend = true;
5136        return gfx_v8_0_hw_fini(adev);
5137}
5138
5139static int gfx_v8_0_resume(void *handle)
5140{
5141        int r;
5142        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143
5144        r = gfx_v8_0_hw_init(adev);
5145        adev->gfx.in_suspend = false;
5146        return r;
5147}
5148
5149static bool gfx_v8_0_is_idle(void *handle)
5150{
5151        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152
5153        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5154                return false;
5155        else
5156                return true;
5157}
5158
5159static int gfx_v8_0_wait_for_idle(void *handle)
5160{
5161        unsigned i;
5162        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163
5164        for (i = 0; i < adev->usec_timeout; i++) {
5165                if (gfx_v8_0_is_idle(handle))
5166                        return 0;
5167
5168                udelay(1);
5169        }
5170        return -ETIMEDOUT;
5171}
5172
5173static bool gfx_v8_0_check_soft_reset(void *handle)
5174{
5175        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5176        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5177        u32 tmp;
5178
5179        /* GRBM_STATUS */
5180        tmp = RREG32(mmGRBM_STATUS);
5181        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5182                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5183                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5184                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5185                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5186                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5187                   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5188                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5189                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5190                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5191                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5192                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194        }
5195
5196        /* GRBM_STATUS2 */
5197        tmp = RREG32(mmGRBM_STATUS2);
5198        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5199                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5200                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5201
5202        if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5203            REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5204            REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5205                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206                                                SOFT_RESET_CPF, 1);
5207                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208                                                SOFT_RESET_CPC, 1);
5209                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210                                                SOFT_RESET_CPG, 1);
5211                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5212                                                SOFT_RESET_GRBM, 1);
5213        }
5214
5215        /* SRBM_STATUS */
5216        tmp = RREG32(mmSRBM_STATUS);
5217        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5218                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5219                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5220        if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5221                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5222                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5223
5224        if (grbm_soft_reset || srbm_soft_reset) {
5225                adev->gfx.grbm_soft_reset = grbm_soft_reset;
5226                adev->gfx.srbm_soft_reset = srbm_soft_reset;
5227                return true;
5228        } else {
5229                adev->gfx.grbm_soft_reset = 0;
5230                adev->gfx.srbm_soft_reset = 0;
5231                return false;
5232        }
5233}
5234
5235static int gfx_v8_0_pre_soft_reset(void *handle)
5236{
5237        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5239
5240        if ((!adev->gfx.grbm_soft_reset) &&
5241            (!adev->gfx.srbm_soft_reset))
5242                return 0;
5243
5244        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246
5247        /* stop the rlc */
5248        gfx_v8_0_rlc_stop(adev);
5249
5250        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5251            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5252                /* Disable GFX parsing/prefetching */
5253                gfx_v8_0_cp_gfx_enable(adev, false);
5254
5255        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5256            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5257            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5258            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5259                int i;
5260
5261                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5262                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5263
5264                        mutex_lock(&adev->srbm_mutex);
5265                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5266                        gfx_v8_0_deactivate_hqd(adev, 2);
5267                        vi_srbm_select(adev, 0, 0, 0, 0);
5268                        mutex_unlock(&adev->srbm_mutex);
5269                }
5270                /* Disable MEC parsing/prefetching */
5271                gfx_v8_0_cp_compute_enable(adev, false);
5272        }
5273
5274       return 0;
5275}
5276
5277static int gfx_v8_0_soft_reset(void *handle)
5278{
5279        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281        u32 tmp;
5282
5283        if ((!adev->gfx.grbm_soft_reset) &&
5284            (!adev->gfx.srbm_soft_reset))
5285                return 0;
5286
5287        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5288        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5289
5290        if (grbm_soft_reset || srbm_soft_reset) {
5291                tmp = RREG32(mmGMCON_DEBUG);
5292                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5293                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5294                WREG32(mmGMCON_DEBUG, tmp);
5295                udelay(50);
5296        }
5297
5298        if (grbm_soft_reset) {
5299                tmp = RREG32(mmGRBM_SOFT_RESET);
5300                tmp |= grbm_soft_reset;
5301                dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5302                WREG32(mmGRBM_SOFT_RESET, tmp);
5303                tmp = RREG32(mmGRBM_SOFT_RESET);
5304
5305                udelay(50);
5306
5307                tmp &= ~grbm_soft_reset;
5308                WREG32(mmGRBM_SOFT_RESET, tmp);
5309                tmp = RREG32(mmGRBM_SOFT_RESET);
5310        }
5311
5312        if (srbm_soft_reset) {
5313                tmp = RREG32(mmSRBM_SOFT_RESET);
5314                tmp |= srbm_soft_reset;
5315                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5316                WREG32(mmSRBM_SOFT_RESET, tmp);
5317                tmp = RREG32(mmSRBM_SOFT_RESET);
5318
5319                udelay(50);
5320
5321                tmp &= ~srbm_soft_reset;
5322                WREG32(mmSRBM_SOFT_RESET, tmp);
5323                tmp = RREG32(mmSRBM_SOFT_RESET);
5324        }
5325
5326        if (grbm_soft_reset || srbm_soft_reset) {
5327                tmp = RREG32(mmGMCON_DEBUG);
5328                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5329                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5330                WREG32(mmGMCON_DEBUG, tmp);
5331        }
5332
5333        /* Wait a little for things to settle down */
5334        udelay(50);
5335
5336        return 0;
5337}
5338
5339static int gfx_v8_0_post_soft_reset(void *handle)
5340{
5341        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343
5344        if ((!adev->gfx.grbm_soft_reset) &&
5345            (!adev->gfx.srbm_soft_reset))
5346                return 0;
5347
5348        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350
5351        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353                gfx_v8_0_cp_gfx_resume(adev);
5354
5355        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359                int i;
5360
5361                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
5364                        mutex_lock(&adev->srbm_mutex);
5365                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5366                        gfx_v8_0_deactivate_hqd(adev, 2);
5367                        vi_srbm_select(adev, 0, 0, 0, 0);
5368                        mutex_unlock(&adev->srbm_mutex);
5369                }
5370                gfx_v8_0_kiq_resume(adev);
5371        }
5372        gfx_v8_0_rlc_start(adev);
5373
5374        return 0;
5375}
5376
5377/**
5378 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5379 *
5380 * @adev: amdgpu_device pointer
5381 *
5382 * Fetches a GPU clock counter snapshot.
5383 * Returns the 64 bit clock counter snapshot.
5384 */
5385static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5386{
5387        uint64_t clock;
5388
5389        mutex_lock(&adev->gfx.gpu_clock_mutex);
5390        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5391        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5392                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5393        mutex_unlock(&adev->gfx.gpu_clock_mutex);
5394        return clock;
5395}
5396
5397static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398                                          uint32_t vmid,
5399                                          uint32_t gds_base, uint32_t gds_size,
5400                                          uint32_t gws_base, uint32_t gws_size,
5401                                          uint32_t oa_base, uint32_t oa_size)
5402{
5403        gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5404        gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405
5406        gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5407        gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408
5409        oa_base = oa_base >> AMDGPU_OA_SHIFT;
5410        oa_size = oa_size >> AMDGPU_OA_SHIFT;
5411
5412        /* GDS Base */
5413        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5414        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5415                                WRITE_DATA_DST_SEL(0)));
5416        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5417        amdgpu_ring_write(ring, 0);
5418        amdgpu_ring_write(ring, gds_base);
5419
5420        /* GDS Size */
5421        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5422        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5423                                WRITE_DATA_DST_SEL(0)));
5424        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5425        amdgpu_ring_write(ring, 0);
5426        amdgpu_ring_write(ring, gds_size);
5427
5428        /* GWS */
5429        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5430        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5431                                WRITE_DATA_DST_SEL(0)));
5432        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5433        amdgpu_ring_write(ring, 0);
5434        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5435
5436        /* OA */
5437        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5439                                WRITE_DATA_DST_SEL(0)));
5440        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5441        amdgpu_ring_write(ring, 0);
5442        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5443}
5444
5445static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5446{
5447        WREG32(mmSQ_IND_INDEX,
5448                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5449                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5450                (address << SQ_IND_INDEX__INDEX__SHIFT) |
5451                (SQ_IND_INDEX__FORCE_READ_MASK));
5452        return RREG32(mmSQ_IND_DATA);
5453}
5454
5455static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5456                           uint32_t wave, uint32_t thread,
5457                           uint32_t regno, uint32_t num, uint32_t *out)
5458{
5459        WREG32(mmSQ_IND_INDEX,
5460                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5461                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5462                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5463                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5464                (SQ_IND_INDEX__FORCE_READ_MASK) |
5465                (SQ_IND_INDEX__AUTO_INCR_MASK));
5466        while (num--)
5467                *(out++) = RREG32(mmSQ_IND_DATA);
5468}
5469
5470static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5471{
5472        /* type 0 wave data */
5473        dst[(*no_fields)++] = 0;
5474        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5475        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5476        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5477        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5478        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5479        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5480        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5481        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5482        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5483        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5484        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5485        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5486        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5487        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5488        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5489        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5490        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5491        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5492}
5493
5494static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5495                                     uint32_t wave, uint32_t start,
5496                                     uint32_t size, uint32_t *dst)
5497{
5498        wave_read_regs(
5499                adev, simd, wave, 0,
5500                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5501}
5502
5503
5504static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5505        .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5506        .select_se_sh = &gfx_v8_0_select_se_sh,
5507        .read_wave_data = &gfx_v8_0_read_wave_data,
5508        .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5509        .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5510};
5511
5512static int gfx_v8_0_early_init(void *handle)
5513{
5514        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5515
5516        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5517        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5518        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5519        gfx_v8_0_set_ring_funcs(adev);
5520        gfx_v8_0_set_irq_funcs(adev);
5521        gfx_v8_0_set_gds_init(adev);
5522        gfx_v8_0_set_rlc_funcs(adev);
5523
5524        return 0;
5525}
5526
5527static int gfx_v8_0_late_init(void *handle)
5528{
5529        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530        int r;
5531
5532        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5533        if (r)
5534                return r;
5535
5536        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5537        if (r)
5538                return r;
5539
5540        /* requires IBs so do in late init after IB pool is initialized */
5541        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5542        if (r)
5543                return r;
5544
5545        amdgpu_device_ip_set_powergating_state(adev,
5546                                               AMD_IP_BLOCK_TYPE_GFX,
5547                                               AMD_PG_STATE_GATE);
5548
5549        return 0;
5550}
5551
5552static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5553                                                       bool enable)
5554{
5555        if ((adev->asic_type == CHIP_POLARIS11) ||
5556            (adev->asic_type == CHIP_POLARIS12) ||
5557            (adev->asic_type == CHIP_VEGAM))
5558                /* Send msg to SMU via Powerplay */
5559                amdgpu_device_ip_set_powergating_state(adev,
5560                                                       AMD_IP_BLOCK_TYPE_SMC,
5561                                                       enable ?
5562                                                       AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5563
5564        WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5565}
5566
5567static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5568                                                        bool enable)
5569{
5570        WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5571}
5572
5573static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5574                bool enable)
5575{
5576        WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5577}
5578
5579static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5580                                          bool enable)
5581{
5582        WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5583}
5584
5585static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5586                                                bool enable)
5587{
5588        WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5589
5590        /* Read any GFX register to wake up GFX. */
5591        if (!enable)
5592                RREG32(mmDB_RENDER_CONTROL);
5593}
5594
5595static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5596                                          bool enable)
5597{
5598        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5599                cz_enable_gfx_cg_power_gating(adev, true);
5600                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5601                        cz_enable_gfx_pipeline_power_gating(adev, true);
5602        } else {
5603                cz_enable_gfx_cg_power_gating(adev, false);
5604                cz_enable_gfx_pipeline_power_gating(adev, false);
5605        }
5606}
5607
5608static int gfx_v8_0_set_powergating_state(void *handle,
5609                                          enum amd_powergating_state state)
5610{
5611        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5612        bool enable = (state == AMD_PG_STATE_GATE);
5613
5614        if (amdgpu_sriov_vf(adev))
5615                return 0;
5616
5617        switch (adev->asic_type) {
5618        case CHIP_CARRIZO:
5619        case CHIP_STONEY:
5620
5621                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5622                        cz_enable_sck_slow_down_on_power_up(adev, true);
5623                        cz_enable_sck_slow_down_on_power_down(adev, true);
5624                } else {
5625                        cz_enable_sck_slow_down_on_power_up(adev, false);
5626                        cz_enable_sck_slow_down_on_power_down(adev, false);
5627                }
5628                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5629                        cz_enable_cp_power_gating(adev, true);
5630                else
5631                        cz_enable_cp_power_gating(adev, false);
5632
5633                cz_update_gfx_cg_power_gating(adev, enable);
5634
5635                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5636                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5637                else
5638                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5639
5640                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5641                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5642                else
5643                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5644                break;
5645        case CHIP_POLARIS11:
5646        case CHIP_POLARIS12:
5647        case CHIP_VEGAM:
5648                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5649                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5650                else
5651                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5652
5653                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5654                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5655                else
5656                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5657
5658                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5659                        polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5660                else
5661                        polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5662                break;
5663        default:
5664                break;
5665        }
5666
5667        return 0;
5668}
5669
5670static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5671{
5672        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5673        int data;
5674
5675        if (amdgpu_sriov_vf(adev))
5676                *flags = 0;
5677
5678        /* AMD_CG_SUPPORT_GFX_MGCG */
5679        data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680        if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5681                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5682
5683        /* AMD_CG_SUPPORT_GFX_CGLG */
5684        data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5686                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5687
5688        /* AMD_CG_SUPPORT_GFX_CGLS */
5689        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5690                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5691
5692        /* AMD_CG_SUPPORT_GFX_CGTS */
5693        data = RREG32(mmCGTS_SM_CTRL_REG);
5694        if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5695                *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5696
5697        /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5698        if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5699                *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5700
5701        /* AMD_CG_SUPPORT_GFX_RLC_LS */
5702        data = RREG32(mmRLC_MEM_SLP_CNTL);
5703        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5704                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5705
5706        /* AMD_CG_SUPPORT_GFX_CP_LS */
5707        data = RREG32(mmCP_MEM_SLP_CNTL);
5708        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5709                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5710}
5711
5712static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5713                                     uint32_t reg_addr, uint32_t cmd)
5714{
5715        uint32_t data;
5716
5717        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5718
5719        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5720        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5721
5722        data = RREG32(mmRLC_SERDES_WR_CTRL);
5723        if (adev->asic_type == CHIP_STONEY)
5724                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5725                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5726                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5727                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5728                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5729                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5730                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5731                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5732                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5733        else
5734                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5735                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5736                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5737                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5738                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5739                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5740                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5741                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5742                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5743                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5744                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5745        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5746                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5747                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5748                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5749
5750        WREG32(mmRLC_SERDES_WR_CTRL, data);
5751}
5752
5753#define MSG_ENTER_RLC_SAFE_MODE     1
5754#define MSG_EXIT_RLC_SAFE_MODE      0
5755#define RLC_GPR_REG2__REQ_MASK 0x00000001
5756#define RLC_GPR_REG2__REQ__SHIFT 0
5757#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5758#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5759
5760static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761{
5762        u32 data;
5763        unsigned i;
5764
5765        data = RREG32(mmRLC_CNTL);
5766        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5767                return;
5768
5769        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5770                data |= RLC_SAFE_MODE__CMD_MASK;
5771                data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772                data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5773                WREG32(mmRLC_SAFE_MODE, data);
5774
5775                for (i = 0; i < adev->usec_timeout; i++) {
5776                        if ((RREG32(mmRLC_GPM_STAT) &
5777                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5778                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5779                            (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5780                             RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5781                                break;
5782                        udelay(1);
5783                }
5784
5785                for (i = 0; i < adev->usec_timeout; i++) {
5786                        if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5787                                break;
5788                        udelay(1);
5789                }
5790                adev->gfx.rlc.in_safe_mode = true;
5791        }
5792}
5793
5794static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5795{
5796        u32 data = 0;
5797        unsigned i;
5798
5799        data = RREG32(mmRLC_CNTL);
5800        if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5801                return;
5802
5803        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5804                if (adev->gfx.rlc.in_safe_mode) {
5805                        data |= RLC_SAFE_MODE__CMD_MASK;
5806                        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5807                        WREG32(mmRLC_SAFE_MODE, data);
5808                        adev->gfx.rlc.in_safe_mode = false;
5809                }
5810        }
5811
5812        for (i = 0; i < adev->usec_timeout; i++) {
5813                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5814                        break;
5815                udelay(1);
5816        }
5817}
5818
5819static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5820        .enter_safe_mode = iceland_enter_rlc_safe_mode,
5821        .exit_safe_mode = iceland_exit_rlc_safe_mode
5822};
5823
5824static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5825                                                      bool enable)
5826{
5827        uint32_t temp, data;
5828
5829        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5830
5831        /* It is disabled by HW by default */
5832        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5833                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5835                                /* 1 - RLC memory Light sleep */
5836                                WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5837
5838                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5839                                WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5840                }
5841
5842                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5843                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844                if (adev->flags & AMD_IS_APU)
5845                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5848                else
5849                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5850                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5851                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5852                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5853
5854                if (temp != data)
5855                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856
5857                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858                gfx_v8_0_wait_for_rlc_serdes(adev);
5859
5860                /* 5 - clear mgcg override */
5861                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5862
5863                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5864                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5865                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5866                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5867                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5868                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5869                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5870                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5871                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5872                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5873                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5874                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5875                        if (temp != data)
5876                                WREG32(mmCGTS_SM_CTRL_REG, data);
5877                }
5878                udelay(50);
5879
5880                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5881                gfx_v8_0_wait_for_rlc_serdes(adev);
5882        } else {
5883                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5884                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5885                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5886                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5887                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5888                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5889                if (temp != data)
5890                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5891
5892                /* 2 - disable MGLS in RLC */
5893                data = RREG32(mmRLC_MEM_SLP_CNTL);
5894                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5895                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5896                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5897                }
5898
5899                /* 3 - disable MGLS in CP */
5900                data = RREG32(mmCP_MEM_SLP_CNTL);
5901                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5902                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5903                        WREG32(mmCP_MEM_SLP_CNTL, data);
5904                }
5905
5906                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5907                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5908                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5909                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5910                if (temp != data)
5911                        WREG32(mmCGTS_SM_CTRL_REG, data);
5912
5913                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5914                gfx_v8_0_wait_for_rlc_serdes(adev);
5915
5916                /* 6 - set mgcg override */
5917                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5918
5919                udelay(50);
5920
5921                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5922                gfx_v8_0_wait_for_rlc_serdes(adev);
5923        }
5924
5925        adev->gfx.rlc.funcs->exit_safe_mode(adev);
5926}
5927
5928static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5929                                                      bool enable)
5930{
5931        uint32_t temp, temp1, data, data1;
5932
5933        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934
5935        adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936
5937        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5938                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940                if (temp1 != data1)
5941                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942
5943                /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5944                gfx_v8_0_wait_for_rlc_serdes(adev);
5945
5946                /* 2 - clear cgcg override */
5947                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5948
5949                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5950                gfx_v8_0_wait_for_rlc_serdes(adev);
5951
5952                /* 3 - write cmd to set CGLS */
5953                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5954
5955                /* 4 - enable cgcg */
5956                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957
5958                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959                        /* enable cgls*/
5960                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5961
5962                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5964
5965                        if (temp1 != data1)
5966                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967                } else {
5968                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969                }
5970
5971                if (temp != data)
5972                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5973
5974                /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5975                 * Cmp_busy/GFX_Idle interrupts
5976                 */
5977                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5978        } else {
5979                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5980                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5981
5982                /* TEST CGCG */
5983                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5984                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5985                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5986                if (temp1 != data1)
5987                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5988
5989                /* read gfx register to wake up cgcg */
5990                RREG32(mmCB_CGTT_SCLK_CTRL);
5991                RREG32(mmCB_CGTT_SCLK_CTRL);
5992                RREG32(mmCB_CGTT_SCLK_CTRL);
5993                RREG32(mmCB_CGTT_SCLK_CTRL);
5994
5995                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5996                gfx_v8_0_wait_for_rlc_serdes(adev);
5997
5998                /* write cmd to Set CGCG Overrride */
5999                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6000
6001                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6002                gfx_v8_0_wait_for_rlc_serdes(adev);
6003
6004                /* write cmd to Clear CGLS */
6005                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6006
6007                /* disable cgcg, cgls should be disabled too. */
6008                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6009                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6010                if (temp != data)
6011                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6012                /* enable interrupts again for PG */
6013                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6014        }
6015
6016        gfx_v8_0_wait_for_rlc_serdes(adev);
6017
6018        adev->gfx.rlc.funcs->exit_safe_mode(adev);
6019}
6020static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021                                            bool enable)
6022{
6023        if (enable) {
6024                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6025                 * ===  MGCG + MGLS + TS(CG/LS) ===
6026                 */
6027                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6029        } else {
6030                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6031                 * ===  CGCG + CGLS ===
6032                 */
6033                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6034                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6035        }
6036        return 0;
6037}
6038
6039static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6040                                          enum amd_clockgating_state state)
6041{
6042        uint32_t msg_id, pp_state = 0;
6043        uint32_t pp_support_state = 0;
6044
6045        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6046                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6047                        pp_support_state = PP_STATE_SUPPORT_LS;
6048                        pp_state = PP_STATE_LS;
6049                }
6050                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6051                        pp_support_state |= PP_STATE_SUPPORT_CG;
6052                        pp_state |= PP_STATE_CG;
6053                }
6054                if (state == AMD_CG_STATE_UNGATE)
6055                        pp_state = 0;
6056
6057                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058                                PP_BLOCK_GFX_CG,
6059                                pp_support_state,
6060                                pp_state);
6061                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6062                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6063        }
6064
6065        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6066                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6067                        pp_support_state = PP_STATE_SUPPORT_LS;
6068                        pp_state = PP_STATE_LS;
6069                }
6070
6071                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6072                        pp_support_state |= PP_STATE_SUPPORT_CG;
6073                        pp_state |= PP_STATE_CG;
6074                }
6075
6076                if (state == AMD_CG_STATE_UNGATE)
6077                        pp_state = 0;
6078
6079                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080                                PP_BLOCK_GFX_MG,
6081                                pp_support_state,
6082                                pp_state);
6083                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6084                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6085        }
6086
6087        return 0;
6088}
6089
6090static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6091                                          enum amd_clockgating_state state)
6092{
6093
6094        uint32_t msg_id, pp_state = 0;
6095        uint32_t pp_support_state = 0;
6096
6097        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6098                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6099                        pp_support_state = PP_STATE_SUPPORT_LS;
6100                        pp_state = PP_STATE_LS;
6101                }
6102                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6103                        pp_support_state |= PP_STATE_SUPPORT_CG;
6104                        pp_state |= PP_STATE_CG;
6105                }
6106                if (state == AMD_CG_STATE_UNGATE)
6107                        pp_state = 0;
6108
6109                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110                                PP_BLOCK_GFX_CG,
6111                                pp_support_state,
6112                                pp_state);
6113                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6114                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6115        }
6116
6117        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6118                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6119                        pp_support_state = PP_STATE_SUPPORT_LS;
6120                        pp_state = PP_STATE_LS;
6121                }
6122                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6123                        pp_support_state |= PP_STATE_SUPPORT_CG;
6124                        pp_state |= PP_STATE_CG;
6125                }
6126                if (state == AMD_CG_STATE_UNGATE)
6127                        pp_state = 0;
6128
6129                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130                                PP_BLOCK_GFX_3D,
6131                                pp_support_state,
6132                                pp_state);
6133                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6135        }
6136
6137        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6138                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6139                        pp_support_state = PP_STATE_SUPPORT_LS;
6140                        pp_state = PP_STATE_LS;
6141                }
6142
6143                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6144                        pp_support_state |= PP_STATE_SUPPORT_CG;
6145                        pp_state |= PP_STATE_CG;
6146                }
6147
6148                if (state == AMD_CG_STATE_UNGATE)
6149                        pp_state = 0;
6150
6151                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152                                PP_BLOCK_GFX_MG,
6153                                pp_support_state,
6154                                pp_state);
6155                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6156                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6157        }
6158
6159        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6160                pp_support_state = PP_STATE_SUPPORT_LS;
6161
6162                if (state == AMD_CG_STATE_UNGATE)
6163                        pp_state = 0;
6164                else
6165                        pp_state = PP_STATE_LS;
6166
6167                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168                                PP_BLOCK_GFX_RLC,
6169                                pp_support_state,
6170                                pp_state);
6171                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6173        }
6174
6175        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6176                pp_support_state = PP_STATE_SUPPORT_LS;
6177
6178                if (state == AMD_CG_STATE_UNGATE)
6179                        pp_state = 0;
6180                else
6181                        pp_state = PP_STATE_LS;
6182                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6183                        PP_BLOCK_GFX_CP,
6184                        pp_support_state,
6185                        pp_state);
6186                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6187                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6188        }
6189
6190        return 0;
6191}
6192
6193static int gfx_v8_0_set_clockgating_state(void *handle,
6194                                          enum amd_clockgating_state state)
6195{
6196        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6197
6198        if (amdgpu_sriov_vf(adev))
6199                return 0;
6200
6201        switch (adev->asic_type) {
6202        case CHIP_FIJI:
6203        case CHIP_CARRIZO:
6204        case CHIP_STONEY:
6205                gfx_v8_0_update_gfx_clock_gating(adev,
6206                                                 state == AMD_CG_STATE_GATE);
6207                break;
6208        case CHIP_TONGA:
6209                gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6210                break;
6211        case CHIP_POLARIS10:
6212        case CHIP_POLARIS11:
6213        case CHIP_POLARIS12:
6214        case CHIP_VEGAM:
6215                gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6216                break;
6217        default:
6218                break;
6219        }
6220        return 0;
6221}
6222
6223static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6224{
6225        return ring->adev->wb.wb[ring->rptr_offs];
6226}
6227
6228static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6229{
6230        struct amdgpu_device *adev = ring->adev;
6231
6232        if (ring->use_doorbell)
6233                /* XXX check if swapping is necessary on BE */
6234                return ring->adev->wb.wb[ring->wptr_offs];
6235        else
6236                return RREG32(mmCP_RB0_WPTR);
6237}
6238
6239static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6240{
6241        struct amdgpu_device *adev = ring->adev;
6242
6243        if (ring->use_doorbell) {
6244                /* XXX check if swapping is necessary on BE */
6245                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6246                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6247        } else {
6248                WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6249                (void)RREG32(mmCP_RB0_WPTR);
6250        }
6251}
6252
6253static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6254{
6255        u32 ref_and_mask, reg_mem_engine;
6256
6257        if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6258            (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6259                switch (ring->me) {
6260                case 1:
6261                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6262                        break;
6263                case 2:
6264                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6265                        break;
6266                default:
6267                        return;
6268                }
6269                reg_mem_engine = 0;
6270        } else {
6271                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6272                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6273        }
6274
6275        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6276        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6277                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6278                                 reg_mem_engine));
6279        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6280        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6281        amdgpu_ring_write(ring, ref_and_mask);
6282        amdgpu_ring_write(ring, ref_and_mask);
6283        amdgpu_ring_write(ring, 0x20); /* poll interval */
6284}
6285
6286static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6287{
6288        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6289        amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6290                EVENT_INDEX(4));
6291
6292        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6293        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294                EVENT_INDEX(0));
6295}
6296
6297static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6298                                      struct amdgpu_ib *ib,
6299                                      unsigned vmid, bool ctx_switch)
6300{
6301        u32 header, control = 0;
6302
6303        if (ib->flags & AMDGPU_IB_FLAG_CE)
6304                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6305        else
6306                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6307
6308        control |= ib->length_dw | (vmid << 24);
6309
6310        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6311                control |= INDIRECT_BUFFER_PRE_ENB(1);
6312
6313                if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6314                        gfx_v8_0_ring_emit_de_meta(ring);
6315        }
6316
6317        amdgpu_ring_write(ring, header);
6318        amdgpu_ring_write(ring,
6319#ifdef __BIG_ENDIAN
6320                          (2 << 0) |
6321#endif
6322                          (ib->gpu_addr & 0xFFFFFFFC));
6323        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6324        amdgpu_ring_write(ring, control);
6325}
6326
6327static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6328                                          struct amdgpu_ib *ib,
6329                                          unsigned vmid, bool ctx_switch)
6330{
6331        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6332
6333        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6334        amdgpu_ring_write(ring,
6335#ifdef __BIG_ENDIAN
6336                                (2 << 0) |
6337#endif
6338                                (ib->gpu_addr & 0xFFFFFFFC));
6339        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6340        amdgpu_ring_write(ring, control);
6341}
6342
6343static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6344                                         u64 seq, unsigned flags)
6345{
6346        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6347        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6348
6349        /* EVENT_WRITE_EOP - flush caches, send int */
6350        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6351        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6352                                 EOP_TC_ACTION_EN |
6353                                 EOP_TC_WB_ACTION_EN |
6354                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6355                                 EVENT_INDEX(5)));
6356        amdgpu_ring_write(ring, addr & 0xfffffffc);
6357        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6358                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6359        amdgpu_ring_write(ring, lower_32_bits(seq));
6360        amdgpu_ring_write(ring, upper_32_bits(seq));
6361
6362}
6363
6364static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6365{
6366        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6367        uint32_t seq = ring->fence_drv.sync_seq;
6368        uint64_t addr = ring->fence_drv.gpu_addr;
6369
6370        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6371        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6372                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6373                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6374        amdgpu_ring_write(ring, addr & 0xfffffffc);
6375        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6376        amdgpu_ring_write(ring, seq);
6377        amdgpu_ring_write(ring, 0xffffffff);
6378        amdgpu_ring_write(ring, 4); /* poll interval */
6379}
6380
6381static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6382                                        unsigned vmid, uint64_t pd_addr)
6383{
6384        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6385
6386        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6387
6388        /* wait for the invalidate to complete */
6389        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6390        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6391                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6392                                 WAIT_REG_MEM_ENGINE(0))); /* me */
6393        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6394        amdgpu_ring_write(ring, 0);
6395        amdgpu_ring_write(ring, 0); /* ref */
6396        amdgpu_ring_write(ring, 0); /* mask */
6397        amdgpu_ring_write(ring, 0x20); /* poll interval */
6398
6399        /* compute doesn't have PFP */
6400        if (usepfp) {
6401                /* sync PFP to ME, otherwise we might get invalid PFP reads */
6402                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6403                amdgpu_ring_write(ring, 0x0);
6404        }
6405}
6406
6407static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6408{
6409        return ring->adev->wb.wb[ring->wptr_offs];
6410}
6411
6412static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6413{
6414        struct amdgpu_device *adev = ring->adev;
6415
6416        /* XXX check if swapping is necessary on BE */
6417        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6418        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6419}
6420
6421static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6422                                           bool acquire)
6423{
6424        struct amdgpu_device *adev = ring->adev;
6425        int pipe_num, tmp, reg;
6426        int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6427
6428        pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6429
6430        /* first me only has 2 entries, GFX and HP3D */
6431        if (ring->me > 0)
6432                pipe_num -= 2;
6433
6434        reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6435        tmp = RREG32(reg);
6436        tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437        WREG32(reg, tmp);
6438}
6439
6440static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6441                                            struct amdgpu_ring *ring,
6442                                            bool acquire)
6443{
6444        int i, pipe;
6445        bool reserve;
6446        struct amdgpu_ring *iring;
6447
6448        mutex_lock(&adev->gfx.pipe_reserve_mutex);
6449        pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6450        if (acquire)
6451                set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452        else
6453                clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454
6455        if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6456                /* Clear all reservations - everyone reacquires all resources */
6457                for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6458                        gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6459                                                       true);
6460
6461                for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6462                        gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6463                                                       true);
6464        } else {
6465                /* Lower all pipes without a current reservation */
6466                for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6467                        iring = &adev->gfx.gfx_ring[i];
6468                        pipe = amdgpu_gfx_queue_to_bit(adev,
6469                                                       iring->me,
6470                                                       iring->pipe,
6471                                                       0);
6472                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6473                        gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6474                }
6475
6476                for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6477                        iring = &adev->gfx.compute_ring[i];
6478                        pipe = amdgpu_gfx_queue_to_bit(adev,
6479                                                       iring->me,
6480                                                       iring->pipe,
6481                                                       0);
6482                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6483                        gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484                }
6485        }
6486
6487        mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6488}
6489
6490static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6491                                      struct amdgpu_ring *ring,
6492                                      bool acquire)
6493{
6494        uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6495        uint32_t queue_priority = acquire ? 0xf : 0x0;
6496
6497        mutex_lock(&adev->srbm_mutex);
6498        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6499
6500        WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6501        WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6502
6503        vi_srbm_select(adev, 0, 0, 0, 0);
6504        mutex_unlock(&adev->srbm_mutex);
6505}
6506static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6507                                               enum drm_sched_priority priority)
6508{
6509        struct amdgpu_device *adev = ring->adev;
6510        bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6511
6512        if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6513                return;
6514
6515        gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6516        gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6517}
6518
6519static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520                                             u64 addr, u64 seq,
6521                                             unsigned flags)
6522{
6523        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6524        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6525
6526        /* RELEASE_MEM - flush caches, send int */
6527        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6528        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6529                                 EOP_TC_ACTION_EN |
6530                                 EOP_TC_WB_ACTION_EN |
6531                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6532                                 EVENT_INDEX(5)));
6533        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6534        amdgpu_ring_write(ring, addr & 0xfffffffc);
6535        amdgpu_ring_write(ring, upper_32_bits(addr));
6536        amdgpu_ring_write(ring, lower_32_bits(seq));
6537        amdgpu_ring_write(ring, upper_32_bits(seq));
6538}
6539
6540static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6541                                         u64 seq, unsigned int flags)
6542{
6543        /* we only allocate 32bit for each seq wb address */
6544        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6545
6546        /* write fence seq to the "addr" */
6547        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6548        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6549                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6550        amdgpu_ring_write(ring, lower_32_bits(addr));
6551        amdgpu_ring_write(ring, upper_32_bits(addr));
6552        amdgpu_ring_write(ring, lower_32_bits(seq));
6553
6554        if (flags & AMDGPU_FENCE_FLAG_INT) {
6555                /* set register to trigger INT */
6556                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6557                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6558                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6559                amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6560                amdgpu_ring_write(ring, 0);
6561                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6562        }
6563}
6564
6565static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6566{
6567        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6568        amdgpu_ring_write(ring, 0);
6569}
6570
6571static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572{
6573        uint32_t dw2 = 0;
6574
6575        if (amdgpu_sriov_vf(ring->adev))
6576                gfx_v8_0_ring_emit_ce_meta(ring);
6577
6578        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6579        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6580                gfx_v8_0_ring_emit_vgt_flush(ring);
6581                /* set load_global_config & load_global_uconfig */
6582                dw2 |= 0x8001;
6583                /* set load_cs_sh_regs */
6584                dw2 |= 0x01000000;
6585                /* set load_per_context_state & load_gfx_sh_regs for GFX */
6586                dw2 |= 0x10002;
6587
6588                /* set load_ce_ram if preamble presented */
6589                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6590                        dw2 |= 0x10000000;
6591        } else {
6592                /* still load_ce_ram if this is the first time preamble presented
6593                 * although there is no context switch happens.
6594                 */
6595                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596                        dw2 |= 0x10000000;
6597        }
6598
6599        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6600        amdgpu_ring_write(ring, dw2);
6601        amdgpu_ring_write(ring, 0);
6602}
6603
6604static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605{
6606        unsigned ret;
6607
6608        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6609        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6610        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6611        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6612        ret = ring->wptr & ring->buf_mask;
6613        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6614        return ret;
6615}
6616
6617static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618{
6619        unsigned cur;
6620
6621        BUG_ON(offset > ring->buf_mask);
6622        BUG_ON(ring->ring[offset] != 0x55aa55aa);
6623
6624        cur = (ring->wptr & ring->buf_mask) - 1;
6625        if (likely(cur > offset))
6626                ring->ring[offset] = cur - offset;
6627        else
6628                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6629}
6630
6631static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6632{
6633        struct amdgpu_device *adev = ring->adev;
6634
6635        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6636        amdgpu_ring_write(ring, 0 |     /* src: register*/
6637                                (5 << 8) |      /* dst: memory */
6638                                (1 << 20));     /* write confirm */
6639        amdgpu_ring_write(ring, reg);
6640        amdgpu_ring_write(ring, 0);
6641        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6642                                adev->virt.reg_val_offs * 4));
6643        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6644                                adev->virt.reg_val_offs * 4));
6645}
6646
6647static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6648                                  uint32_t val)
6649{
6650        uint32_t cmd;
6651
6652        switch (ring->funcs->type) {
6653        case AMDGPU_RING_TYPE_GFX:
6654                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6655                break;
6656        case AMDGPU_RING_TYPE_KIQ:
6657                cmd = 1 << 16; /* no inc addr */
6658                break;
6659        default:
6660                cmd = WR_CONFIRM;
6661                break;
6662        }
6663
6664        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6665        amdgpu_ring_write(ring, cmd);
6666        amdgpu_ring_write(ring, reg);
6667        amdgpu_ring_write(ring, 0);
6668        amdgpu_ring_write(ring, val);
6669}
6670
6671static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6672                                                 enum amdgpu_interrupt_state state)
6673{
6674        WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6675                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6676}
6677
6678static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6679                                                     int me, int pipe,
6680                                                     enum amdgpu_interrupt_state state)
6681{
6682        u32 mec_int_cntl, mec_int_cntl_reg;
6683
6684        /*
6685         * amdgpu controls only the first MEC. That's why this function only
6686         * handles the setting of interrupts for this specific MEC. All other
6687         * pipes' interrupts are set by amdkfd.
6688         */
6689
6690        if (me == 1) {
6691                switch (pipe) {
6692                case 0:
6693                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6694                        break;
6695                case 1:
6696                        mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6697                        break;
6698                case 2:
6699                        mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6700                        break;
6701                case 3:
6702                        mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6703                        break;
6704                default:
6705                        DRM_DEBUG("invalid pipe %d\n", pipe);
6706                        return;
6707                }
6708        } else {
6709                DRM_DEBUG("invalid me %d\n", me);
6710                return;
6711        }
6712
6713        switch (state) {
6714        case AMDGPU_IRQ_STATE_DISABLE:
6715                mec_int_cntl = RREG32(mec_int_cntl_reg);
6716                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6717                WREG32(mec_int_cntl_reg, mec_int_cntl);
6718                break;
6719        case AMDGPU_IRQ_STATE_ENABLE:
6720                mec_int_cntl = RREG32(mec_int_cntl_reg);
6721                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6722                WREG32(mec_int_cntl_reg, mec_int_cntl);
6723                break;
6724        default:
6725                break;
6726        }
6727}
6728
6729static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6730                                             struct amdgpu_irq_src *source,
6731                                             unsigned type,
6732                                             enum amdgpu_interrupt_state state)
6733{
6734        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6735                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6736
6737        return 0;
6738}
6739
6740static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6741                                              struct amdgpu_irq_src *source,
6742                                              unsigned type,
6743                                              enum amdgpu_interrupt_state state)
6744{
6745        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6746                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6747
6748        return 0;
6749}
6750
6751static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6752                                            struct amdgpu_irq_src *src,
6753                                            unsigned type,
6754                                            enum amdgpu_interrupt_state state)
6755{
6756        switch (type) {
6757        case AMDGPU_CP_IRQ_GFX_EOP:
6758                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6759                break;
6760        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6761                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6762                break;
6763        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6764                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6765                break;
6766        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6767                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6768                break;
6769        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6770                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6771                break;
6772        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6773                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6774                break;
6775        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6776                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6777                break;
6778        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6779                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6780                break;
6781        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6782                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6783                break;
6784        default:
6785                break;
6786        }
6787        return 0;
6788}
6789
6790static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6791                            struct amdgpu_irq_src *source,
6792                            struct amdgpu_iv_entry *entry)
6793{
6794        int i;
6795        u8 me_id, pipe_id, queue_id;
6796        struct amdgpu_ring *ring;
6797
6798        DRM_DEBUG("IH: CP EOP\n");
6799        me_id = (entry->ring_id & 0x0c) >> 2;
6800        pipe_id = (entry->ring_id & 0x03) >> 0;
6801        queue_id = (entry->ring_id & 0x70) >> 4;
6802
6803        switch (me_id) {
6804        case 0:
6805                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6806                break;
6807        case 1:
6808        case 2:
6809                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6810                        ring = &adev->gfx.compute_ring[i];
6811                        /* Per-queue interrupt is supported for MEC starting from VI.
6812                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6813                          */
6814                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6815                                amdgpu_fence_process(ring);
6816                }
6817                break;
6818        }
6819        return 0;
6820}
6821
6822static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6823                                 struct amdgpu_irq_src *source,
6824                                 struct amdgpu_iv_entry *entry)
6825{
6826        DRM_ERROR("Illegal register access in command stream\n");
6827        schedule_work(&adev->reset_work);
6828        return 0;
6829}
6830
6831static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6832                                  struct amdgpu_irq_src *source,
6833                                  struct amdgpu_iv_entry *entry)
6834{
6835        DRM_ERROR("Illegal instruction in command stream\n");
6836        schedule_work(&adev->reset_work);
6837        return 0;
6838}
6839
6840static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6841                                            struct amdgpu_irq_src *src,
6842                                            unsigned int type,
6843                                            enum amdgpu_interrupt_state state)
6844{
6845        struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6846
6847        switch (type) {
6848        case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6849                WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6850                             state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6851                if (ring->me == 1)
6852                        WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6853                                     ring->pipe,
6854                                     GENERIC2_INT_ENABLE,
6855                                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6856                else
6857                        WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6858                                     ring->pipe,
6859                                     GENERIC2_INT_ENABLE,
6860                                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6861                break;
6862        default:
6863                BUG(); /* kiq only support GENERIC2_INT now */
6864                break;
6865        }
6866        return 0;
6867}
6868
6869static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6870                            struct amdgpu_irq_src *source,
6871                            struct amdgpu_iv_entry *entry)
6872{
6873        u8 me_id, pipe_id, queue_id;
6874        struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6875
6876        me_id = (entry->ring_id & 0x0c) >> 2;
6877        pipe_id = (entry->ring_id & 0x03) >> 0;
6878        queue_id = (entry->ring_id & 0x70) >> 4;
6879        DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6880                   me_id, pipe_id, queue_id);
6881
6882        amdgpu_fence_process(ring);
6883        return 0;
6884}
6885
6886static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6887        .name = "gfx_v8_0",
6888        .early_init = gfx_v8_0_early_init,
6889        .late_init = gfx_v8_0_late_init,
6890        .sw_init = gfx_v8_0_sw_init,
6891        .sw_fini = gfx_v8_0_sw_fini,
6892        .hw_init = gfx_v8_0_hw_init,
6893        .hw_fini = gfx_v8_0_hw_fini,
6894        .suspend = gfx_v8_0_suspend,
6895        .resume = gfx_v8_0_resume,
6896        .is_idle = gfx_v8_0_is_idle,
6897        .wait_for_idle = gfx_v8_0_wait_for_idle,
6898        .check_soft_reset = gfx_v8_0_check_soft_reset,
6899        .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6900        .soft_reset = gfx_v8_0_soft_reset,
6901        .post_soft_reset = gfx_v8_0_post_soft_reset,
6902        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6903        .set_powergating_state = gfx_v8_0_set_powergating_state,
6904        .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6905};
6906
6907static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6908        .type = AMDGPU_RING_TYPE_GFX,
6909        .align_mask = 0xff,
6910        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911        .support_64bit_ptrs = false,
6912        .get_rptr = gfx_v8_0_ring_get_rptr,
6913        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6914        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6915        .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6916                5 +  /* COND_EXEC */
6917                7 +  /* PIPELINE_SYNC */
6918                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6919                8 +  /* FENCE for VM_FLUSH */
6920                20 + /* GDS switch */
6921                4 + /* double SWITCH_BUFFER,
6922                       the first COND_EXEC jump to the place just
6923                           prior to this double SWITCH_BUFFER  */
6924                5 + /* COND_EXEC */
6925                7 +      /*     HDP_flush */
6926                4 +      /*     VGT_flush */
6927                14 + /* CE_META */
6928                31 + /* DE_META */
6929                3 + /* CNTX_CTRL */
6930                5 + /* HDP_INVL */
6931                8 + 8 + /* FENCE x2 */
6932                2, /* SWITCH_BUFFER */
6933        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6934        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6935        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6936        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6937        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6938        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6939        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6940        .test_ring = gfx_v8_0_ring_test_ring,
6941        .test_ib = gfx_v8_0_ring_test_ib,
6942        .insert_nop = amdgpu_ring_insert_nop,
6943        .pad_ib = amdgpu_ring_generic_pad_ib,
6944        .emit_switch_buffer = gfx_v8_ring_emit_sb,
6945        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6946        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6947        .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6948        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6949};
6950
6951static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6952        .type = AMDGPU_RING_TYPE_COMPUTE,
6953        .align_mask = 0xff,
6954        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6955        .support_64bit_ptrs = false,
6956        .get_rptr = gfx_v8_0_ring_get_rptr,
6957        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6959        .emit_frame_size =
6960                20 + /* gfx_v8_0_ring_emit_gds_switch */
6961                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6962                5 + /* hdp_invalidate */
6963                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6964                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6965                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6966        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6967        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6968        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6969        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6970        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6972        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6973        .test_ring = gfx_v8_0_ring_test_ring,
6974        .test_ib = gfx_v8_0_ring_test_ib,
6975        .insert_nop = amdgpu_ring_insert_nop,
6976        .pad_ib = amdgpu_ring_generic_pad_ib,
6977        .set_priority = gfx_v8_0_ring_set_priority_compute,
6978        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6979};
6980
6981static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982        .type = AMDGPU_RING_TYPE_KIQ,
6983        .align_mask = 0xff,
6984        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985        .support_64bit_ptrs = false,
6986        .get_rptr = gfx_v8_0_ring_get_rptr,
6987        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989        .emit_frame_size =
6990                20 + /* gfx_v8_0_ring_emit_gds_switch */
6991                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6992                5 + /* hdp_invalidate */
6993                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6994                17 + /* gfx_v8_0_ring_emit_vm_flush */
6995                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6996        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6997        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6999        .test_ring = gfx_v8_0_ring_test_ring,
7000        .test_ib = gfx_v8_0_ring_test_ib,
7001        .insert_nop = amdgpu_ring_insert_nop,
7002        .pad_ib = amdgpu_ring_generic_pad_ib,
7003        .emit_rreg = gfx_v8_0_ring_emit_rreg,
7004        .emit_wreg = gfx_v8_0_ring_emit_wreg,
7005};
7006
7007static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7008{
7009        int i;
7010
7011        adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7012
7013        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7014                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7015
7016        for (i = 0; i < adev->gfx.num_compute_rings; i++)
7017                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7018}
7019
7020static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7021        .set = gfx_v8_0_set_eop_interrupt_state,
7022        .process = gfx_v8_0_eop_irq,
7023};
7024
7025static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7026        .set = gfx_v8_0_set_priv_reg_fault_state,
7027        .process = gfx_v8_0_priv_reg_irq,
7028};
7029
7030static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7031        .set = gfx_v8_0_set_priv_inst_fault_state,
7032        .process = gfx_v8_0_priv_inst_irq,
7033};
7034
7035static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7036        .set = gfx_v8_0_kiq_set_interrupt_state,
7037        .process = gfx_v8_0_kiq_irq,
7038};
7039
7040static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7041{
7042        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7043        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7044
7045        adev->gfx.priv_reg_irq.num_types = 1;
7046        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7047
7048        adev->gfx.priv_inst_irq.num_types = 1;
7049        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7050
7051        adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7052        adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7053}
7054
7055static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7056{
7057        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7058}
7059
7060static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7061{
7062        /* init asci gds info */
7063        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7064        adev->gds.gws.total_size = 64;
7065        adev->gds.oa.total_size = 16;
7066
7067        if (adev->gds.mem.total_size == 64 * 1024) {
7068                adev->gds.mem.gfx_partition_size = 4096;
7069                adev->gds.mem.cs_partition_size = 4096;
7070
7071                adev->gds.gws.gfx_partition_size = 4;
7072                adev->gds.gws.cs_partition_size = 4;
7073
7074                adev->gds.oa.gfx_partition_size = 4;
7075                adev->gds.oa.cs_partition_size = 1;
7076        } else {
7077                adev->gds.mem.gfx_partition_size = 1024;
7078                adev->gds.mem.cs_partition_size = 1024;
7079
7080                adev->gds.gws.gfx_partition_size = 16;
7081                adev->gds.gws.cs_partition_size = 16;
7082
7083                adev->gds.oa.gfx_partition_size = 4;
7084                adev->gds.oa.cs_partition_size = 4;
7085        }
7086}
7087
7088static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089                                                 u32 bitmap)
7090{
7091        u32 data;
7092
7093        if (!bitmap)
7094                return;
7095
7096        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7097        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7098
7099        WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7100}
7101
7102static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7103{
7104        u32 data, mask;
7105
7106        data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7107                RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7108
7109        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7110
7111        return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7112}
7113
7114static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7115{
7116        int i, j, k, counter, active_cu_number = 0;
7117        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7118        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7119        unsigned disable_masks[4 * 2];
7120        u32 ao_cu_num;
7121
7122        memset(cu_info, 0, sizeof(*cu_info));
7123
7124        if (adev->flags & AMD_IS_APU)
7125                ao_cu_num = 2;
7126        else
7127                ao_cu_num = adev->gfx.config.max_cu_per_sh;
7128
7129        amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7130
7131        mutex_lock(&adev->grbm_idx_mutex);
7132        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7133                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134                        mask = 1;
7135                        ao_bitmap = 0;
7136                        counter = 0;
7137                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7138                        if (i < 4 && j < 2)
7139                                gfx_v8_0_set_user_cu_inactive_bitmap(
7140                                        adev, disable_masks[i * 2 + j]);
7141                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7142                        cu_info->bitmap[i][j] = bitmap;
7143
7144                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7145                                if (bitmap & mask) {
7146                                        if (counter < ao_cu_num)
7147                                                ao_bitmap |= mask;
7148                                        counter ++;
7149                                }
7150                                mask <<= 1;
7151                        }
7152                        active_cu_number += counter;
7153                        if (i < 2 && j < 2)
7154                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7155                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7156                }
7157        }
7158        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7159        mutex_unlock(&adev->grbm_idx_mutex);
7160
7161        cu_info->number = active_cu_number;
7162        cu_info->ao_cu_mask = ao_cu_mask;
7163        cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7164        cu_info->max_waves_per_simd = 10;
7165        cu_info->max_scratch_slots_per_cu = 32;
7166        cu_info->wave_front_size = 64;
7167        cu_info->lds_size = 64;
7168}
7169
7170const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7171{
7172        .type = AMD_IP_BLOCK_TYPE_GFX,
7173        .major = 8,
7174        .minor = 0,
7175        .rev = 0,
7176        .funcs = &gfx_v8_0_ip_funcs,
7177};
7178
7179const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7180{
7181        .type = AMD_IP_BLOCK_TYPE_GFX,
7182        .major = 8,
7183        .minor = 1,
7184        .rev = 0,
7185        .funcs = &gfx_v8_0_ip_funcs,
7186};
7187
7188static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7189{
7190        uint64_t ce_payload_addr;
7191        int cnt_ce;
7192        union {
7193                struct vi_ce_ib_state regular;
7194                struct vi_ce_ib_state_chained_ib chained;
7195        } ce_payload = {};
7196
7197        if (ring->adev->virt.chained_ib_support) {
7198                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7199                        offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7200                cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7201        } else {
7202                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7203                        offsetof(struct vi_gfx_meta_data, ce_payload);
7204                cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7205        }
7206
7207        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7208        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7209                                WRITE_DATA_DST_SEL(8) |
7210                                WR_CONFIRM) |
7211                                WRITE_DATA_CACHE_POLICY(0));
7212        amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7213        amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7214        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7215}
7216
7217static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7218{
7219        uint64_t de_payload_addr, gds_addr, csa_addr;
7220        int cnt_de;
7221        union {
7222                struct vi_de_ib_state regular;
7223                struct vi_de_ib_state_chained_ib chained;
7224        } de_payload = {};
7225
7226        csa_addr = amdgpu_csa_vaddr(ring->adev);
7227        gds_addr = csa_addr + 4096;
7228        if (ring->adev->virt.chained_ib_support) {
7229                de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7230                de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7231                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7232                cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7233        } else {
7234                de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7235                de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7236                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7237                cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7238        }
7239
7240        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7241        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7242                                WRITE_DATA_DST_SEL(8) |
7243                                WR_CONFIRM) |
7244                                WRITE_DATA_CACHE_POLICY(0));
7245        amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7246        amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7247        amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7248}
7249