linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "vi.h"
  33#include "vi_structs.h"
  34#include "vid.h"
  35#include "amdgpu_ucode.h"
  36#include "amdgpu_atombios.h"
  37#include "atombios_i2c.h"
  38#include "clearstate_vi.h"
  39
  40#include "gmc/gmc_8_2_d.h"
  41#include "gmc/gmc_8_2_sh_mask.h"
  42
  43#include "oss/oss_3_0_d.h"
  44#include "oss/oss_3_0_sh_mask.h"
  45
  46#include "bif/bif_5_0_d.h"
  47#include "bif/bif_5_0_sh_mask.h"
  48#include "gca/gfx_8_0_d.h"
  49#include "gca/gfx_8_0_enum.h"
  50#include "gca/gfx_8_0_sh_mask.h"
  51
  52#include "dce/dce_10_0_d.h"
  53#include "dce/dce_10_0_sh_mask.h"
  54
  55#include "smu/smu_7_1_3_d.h"
  56
  57#include "ivsrcid/ivsrcid_vislands30.h"
  58
  59#define GFX8_NUM_GFX_RINGS     1
  60#define GFX8_MEC_HPD_SIZE 4096
  61
  62#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  63#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  65#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  66
  67#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  68#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  69#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  70#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  71#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  72#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  73#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  74#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  75#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  76
  77#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  78#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  79#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  80#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  82#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  83
  84/* BPM SERDES CMD */
  85#define SET_BPM_SERDES_CMD    1
  86#define CLE_BPM_SERDES_CMD    0
  87
  88/* BPM Register Address*/
  89enum {
  90        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  91        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  92        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  93        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  94        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  95        BPM_REG_FGCG_MAX
  96};
  97
  98#define RLC_FormatDirectRegListLength        14
  99
 100MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 101MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 108MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 112
 113MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 114MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 119
 120MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 121MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 125
 126MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 127MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 132
 133MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 134MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 144
 145MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 146MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 156
 157MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 158MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 168
 169MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 170MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 175
 176static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 177{
 178        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 179        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 180        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 181        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 182        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 183        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 184        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 185        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 186        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 187        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 188        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 189        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 190        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 191        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 192        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 193        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 194};
 195
 196static const u32 golden_settings_tonga_a11[] =
 197{
 198        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 199        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 200        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 201        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 202        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 203        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 204        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 205        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 206        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 207        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 208        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 209        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 210        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 211        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 212        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 213        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 214};
 215
 216static const u32 tonga_golden_common_all[] =
 217{
 218        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 219        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 220        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 221        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 222        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 223        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 224        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 225        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 226};
 227
 228static const u32 tonga_mgcg_cgcg_init[] =
 229{
 230        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 231        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 232        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 233        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 234        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 235        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 236        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 237        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 238        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 239        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 240        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 241        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 242        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 243        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 244        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 245        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 246        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 247        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 248        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 249        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 250        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 251        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 252        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 253        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 254        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 255        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 256        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 257        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 258        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 260        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 261        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 262        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 263        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 264        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 265        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 266        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 267        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 268        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 269        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 270        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 271        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 272        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 273        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 274        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 275        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 276        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 277        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 278        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 279        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 280        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 281        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 282        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 283        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 284        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 285        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 286        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 287        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 288        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 289        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 290        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 291        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 292        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 293        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 294        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 295        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 296        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 297        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 298        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 299        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 300        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 301        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 302        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 303        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 304        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 305};
 306
 307static const u32 golden_settings_vegam_a11[] =
 308{
 309        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 310        mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 311        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 312        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 313        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 314        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 315        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 316        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 317        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 318        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 319        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 320        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 321        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 322        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 323        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 324        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 325        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 326};
 327
 328static const u32 vegam_golden_common_all[] =
 329{
 330        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 331        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 332        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 333        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 334        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 335        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 336};
 337
 338static const u32 golden_settings_polaris11_a11[] =
 339{
 340        mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 341        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 342        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 343        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 344        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 345        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 346        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 347        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 348        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 349        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 350        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 351        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 352        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 353        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 354        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 355        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 356        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 357};
 358
 359static const u32 polaris11_golden_common_all[] =
 360{
 361        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 362        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 363        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 364        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 365        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 366        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 367};
 368
 369static const u32 golden_settings_polaris10_a11[] =
 370{
 371        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 372        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 373        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 374        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 375        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 376        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 377        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 378        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 379        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 380        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 381        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 382        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 383        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 384        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 385        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 386        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 387        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 388};
 389
 390static const u32 polaris10_golden_common_all[] =
 391{
 392        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 393        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 394        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 395        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 396        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 397        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 398        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 399        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 400};
 401
 402static const u32 fiji_golden_common_all[] =
 403{
 404        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 405        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 406        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 407        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 408        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 409        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 410        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 411        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 412        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 413        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 414};
 415
 416static const u32 golden_settings_fiji_a10[] =
 417{
 418        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 419        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 420        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 421        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 422        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 423        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 424        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 425        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 426        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 428        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 429};
 430
 431static const u32 fiji_mgcg_cgcg_init[] =
 432{
 433        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 434        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 435        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 436        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 437        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 438        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 439        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 440        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 441        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 442        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 443        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 444        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 445        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 446        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 447        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 448        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 449        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 450        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 451        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 452        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 453        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 454        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 455        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 456        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 457        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 458        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 459        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 460        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 461        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 463        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 464        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 465        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 466        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 467        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 468};
 469
 470static const u32 golden_settings_iceland_a11[] =
 471{
 472        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 473        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 474        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 475        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 476        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 477        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 478        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 479        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 480        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 481        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 482        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 483        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 484        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 485        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 486        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 487        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 488};
 489
 490static const u32 iceland_golden_common_all[] =
 491{
 492        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 493        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 494        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 495        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 496        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 497        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 498        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 499        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 500};
 501
 502static const u32 iceland_mgcg_cgcg_init[] =
 503{
 504        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 505        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 506        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 507        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 508        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 509        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 510        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 511        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 512        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 513        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 514        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 515        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 516        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 517        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 518        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 519        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 520        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 521        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 522        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 523        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 524        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 525        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 526        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 527        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 528        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 529        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 530        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 531        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 532        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 534        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 535        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 536        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 537        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 538        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 539        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 540        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 541        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 542        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 543        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 544        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 545        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 546        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 547        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 548        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 549        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 550        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 551        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 552        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 553        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 554        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 555        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 556        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 557        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 558        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 559        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 560        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 561        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 562        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 563        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 564        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 565        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 566        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 567        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 568};
 569
 570static const u32 cz_golden_settings_a11[] =
 571{
 572        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 573        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 574        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 575        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 576        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 577        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 578        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 579        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 580        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 581        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 582        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 583        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 584};
 585
 586static const u32 cz_golden_common_all[] =
 587{
 588        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 589        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 590        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 591        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 592        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 593        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 594        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 595        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 596};
 597
 598static const u32 cz_mgcg_cgcg_init[] =
 599{
 600        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 601        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 602        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 603        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 604        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 605        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 606        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 607        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 608        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 609        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 610        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 611        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 612        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 613        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 614        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 615        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 616        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 617        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 618        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 619        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 620        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 621        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 622        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 623        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 624        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 625        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 626        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 627        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 628        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 630        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 631        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 632        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 633        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 634        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 635        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 636        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 637        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 638        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 639        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 640        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 641        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 642        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 643        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 644        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 645        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 646        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 647        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 648        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 649        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 650        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 651        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 652        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 653        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 654        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 655        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 656        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 657        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 658        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 659        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 660        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 661        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 662        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 663        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 664        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 665        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 666        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 667        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 668        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 669        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 670        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 671        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 672        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 673        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 674        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 675};
 676
 677static const u32 stoney_golden_settings_a11[] =
 678{
 679        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 680        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 681        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 682        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 683        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 684        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 685        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 686        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 687        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 688        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 689};
 690
 691static const u32 stoney_golden_common_all[] =
 692{
 693        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 694        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 695        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 696        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 697        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 698        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 699        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 700        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 701};
 702
 703static const u32 stoney_mgcg_cgcg_init[] =
 704{
 705        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 706        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 707        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 708        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 710};
 711
 712
 713static const char * const sq_edc_source_names[] = {
 714        "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 715        "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 716        "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 717        "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 718        "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 719        "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 720        "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 721};
 722
 723static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 724static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 727static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 728static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 729static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 730static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 731
 732#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 733#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 734
 735static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 736{
 737        uint32_t data;
 738
 739        switch (adev->asic_type) {
 740        case CHIP_TOPAZ:
 741                amdgpu_device_program_register_sequence(adev,
 742                                                        iceland_mgcg_cgcg_init,
 743                                                        ARRAY_SIZE(iceland_mgcg_cgcg_init));
 744                amdgpu_device_program_register_sequence(adev,
 745                                                        golden_settings_iceland_a11,
 746                                                        ARRAY_SIZE(golden_settings_iceland_a11));
 747                amdgpu_device_program_register_sequence(adev,
 748                                                        iceland_golden_common_all,
 749                                                        ARRAY_SIZE(iceland_golden_common_all));
 750                break;
 751        case CHIP_FIJI:
 752                amdgpu_device_program_register_sequence(adev,
 753                                                        fiji_mgcg_cgcg_init,
 754                                                        ARRAY_SIZE(fiji_mgcg_cgcg_init));
 755                amdgpu_device_program_register_sequence(adev,
 756                                                        golden_settings_fiji_a10,
 757                                                        ARRAY_SIZE(golden_settings_fiji_a10));
 758                amdgpu_device_program_register_sequence(adev,
 759                                                        fiji_golden_common_all,
 760                                                        ARRAY_SIZE(fiji_golden_common_all));
 761                break;
 762
 763        case CHIP_TONGA:
 764                amdgpu_device_program_register_sequence(adev,
 765                                                        tonga_mgcg_cgcg_init,
 766                                                        ARRAY_SIZE(tonga_mgcg_cgcg_init));
 767                amdgpu_device_program_register_sequence(adev,
 768                                                        golden_settings_tonga_a11,
 769                                                        ARRAY_SIZE(golden_settings_tonga_a11));
 770                amdgpu_device_program_register_sequence(adev,
 771                                                        tonga_golden_common_all,
 772                                                        ARRAY_SIZE(tonga_golden_common_all));
 773                break;
 774        case CHIP_VEGAM:
 775                amdgpu_device_program_register_sequence(adev,
 776                                                        golden_settings_vegam_a11,
 777                                                        ARRAY_SIZE(golden_settings_vegam_a11));
 778                amdgpu_device_program_register_sequence(adev,
 779                                                        vegam_golden_common_all,
 780                                                        ARRAY_SIZE(vegam_golden_common_all));
 781                break;
 782        case CHIP_POLARIS11:
 783        case CHIP_POLARIS12:
 784                amdgpu_device_program_register_sequence(adev,
 785                                                        golden_settings_polaris11_a11,
 786                                                        ARRAY_SIZE(golden_settings_polaris11_a11));
 787                amdgpu_device_program_register_sequence(adev,
 788                                                        polaris11_golden_common_all,
 789                                                        ARRAY_SIZE(polaris11_golden_common_all));
 790                break;
 791        case CHIP_POLARIS10:
 792                amdgpu_device_program_register_sequence(adev,
 793                                                        golden_settings_polaris10_a11,
 794                                                        ARRAY_SIZE(golden_settings_polaris10_a11));
 795                amdgpu_device_program_register_sequence(adev,
 796                                                        polaris10_golden_common_all,
 797                                                        ARRAY_SIZE(polaris10_golden_common_all));
 798                data = RREG32_SMC(ixCG_ACLK_CNTL);
 799                data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 800                data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 801                WREG32_SMC(ixCG_ACLK_CNTL, data);
 802                if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 803                    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 804                     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 805                     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 806                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 807                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 808                }
 809                break;
 810        case CHIP_CARRIZO:
 811                amdgpu_device_program_register_sequence(adev,
 812                                                        cz_mgcg_cgcg_init,
 813                                                        ARRAY_SIZE(cz_mgcg_cgcg_init));
 814                amdgpu_device_program_register_sequence(adev,
 815                                                        cz_golden_settings_a11,
 816                                                        ARRAY_SIZE(cz_golden_settings_a11));
 817                amdgpu_device_program_register_sequence(adev,
 818                                                        cz_golden_common_all,
 819                                                        ARRAY_SIZE(cz_golden_common_all));
 820                break;
 821        case CHIP_STONEY:
 822                amdgpu_device_program_register_sequence(adev,
 823                                                        stoney_mgcg_cgcg_init,
 824                                                        ARRAY_SIZE(stoney_mgcg_cgcg_init));
 825                amdgpu_device_program_register_sequence(adev,
 826                                                        stoney_golden_settings_a11,
 827                                                        ARRAY_SIZE(stoney_golden_settings_a11));
 828                amdgpu_device_program_register_sequence(adev,
 829                                                        stoney_golden_common_all,
 830                                                        ARRAY_SIZE(stoney_golden_common_all));
 831                break;
 832        default:
 833                break;
 834        }
 835}
 836
 837static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 838{
 839        adev->gfx.scratch.num_reg = 8;
 840        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 841        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 842}
 843
 844static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 845{
 846        struct amdgpu_device *adev = ring->adev;
 847        uint32_t scratch;
 848        uint32_t tmp = 0;
 849        unsigned i;
 850        int r;
 851
 852        r = amdgpu_gfx_scratch_get(adev, &scratch);
 853        if (r)
 854                return r;
 855
 856        WREG32(scratch, 0xCAFEDEAD);
 857        r = amdgpu_ring_alloc(ring, 3);
 858        if (r)
 859                goto error_free_scratch;
 860
 861        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 862        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 863        amdgpu_ring_write(ring, 0xDEADBEEF);
 864        amdgpu_ring_commit(ring);
 865
 866        for (i = 0; i < adev->usec_timeout; i++) {
 867                tmp = RREG32(scratch);
 868                if (tmp == 0xDEADBEEF)
 869                        break;
 870                udelay(1);
 871        }
 872
 873        if (i >= adev->usec_timeout)
 874                r = -ETIMEDOUT;
 875
 876error_free_scratch:
 877        amdgpu_gfx_scratch_free(adev, scratch);
 878        return r;
 879}
 880
 881static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 882{
 883        struct amdgpu_device *adev = ring->adev;
 884        struct amdgpu_ib ib;
 885        struct dma_fence *f = NULL;
 886
 887        unsigned int index;
 888        uint64_t gpu_addr;
 889        uint32_t tmp;
 890        long r;
 891
 892        r = amdgpu_device_wb_get(adev, &index);
 893        if (r)
 894                return r;
 895
 896        gpu_addr = adev->wb.gpu_addr + (index * 4);
 897        adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 898        memset(&ib, 0, sizeof(ib));
 899        r = amdgpu_ib_get(adev, NULL, 16,
 900                                        AMDGPU_IB_POOL_DIRECT, &ib);
 901        if (r)
 902                goto err1;
 903
 904        ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 905        ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 906        ib.ptr[2] = lower_32_bits(gpu_addr);
 907        ib.ptr[3] = upper_32_bits(gpu_addr);
 908        ib.ptr[4] = 0xDEADBEEF;
 909        ib.length_dw = 5;
 910
 911        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 912        if (r)
 913                goto err2;
 914
 915        r = dma_fence_wait_timeout(f, false, timeout);
 916        if (r == 0) {
 917                r = -ETIMEDOUT;
 918                goto err2;
 919        } else if (r < 0) {
 920                goto err2;
 921        }
 922
 923        tmp = adev->wb.wb[index];
 924        if (tmp == 0xDEADBEEF)
 925                r = 0;
 926        else
 927                r = -EINVAL;
 928
 929err2:
 930        amdgpu_ib_free(adev, &ib, NULL);
 931        dma_fence_put(f);
 932err1:
 933        amdgpu_device_wb_free(adev, index);
 934        return r;
 935}
 936
 937
 938static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 939{
 940        release_firmware(adev->gfx.pfp_fw);
 941        adev->gfx.pfp_fw = NULL;
 942        release_firmware(adev->gfx.me_fw);
 943        adev->gfx.me_fw = NULL;
 944        release_firmware(adev->gfx.ce_fw);
 945        adev->gfx.ce_fw = NULL;
 946        release_firmware(adev->gfx.rlc_fw);
 947        adev->gfx.rlc_fw = NULL;
 948        release_firmware(adev->gfx.mec_fw);
 949        adev->gfx.mec_fw = NULL;
 950        if ((adev->asic_type != CHIP_STONEY) &&
 951            (adev->asic_type != CHIP_TOPAZ))
 952                release_firmware(adev->gfx.mec2_fw);
 953        adev->gfx.mec2_fw = NULL;
 954
 955        kfree(adev->gfx.rlc.register_list_format);
 956}
 957
 958static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 959{
 960        const char *chip_name;
 961        char fw_name[30];
 962        int err;
 963        struct amdgpu_firmware_info *info = NULL;
 964        const struct common_firmware_header *header = NULL;
 965        const struct gfx_firmware_header_v1_0 *cp_hdr;
 966        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 967        unsigned int *tmp = NULL, i;
 968
 969        DRM_DEBUG("\n");
 970
 971        switch (adev->asic_type) {
 972        case CHIP_TOPAZ:
 973                chip_name = "topaz";
 974                break;
 975        case CHIP_TONGA:
 976                chip_name = "tonga";
 977                break;
 978        case CHIP_CARRIZO:
 979                chip_name = "carrizo";
 980                break;
 981        case CHIP_FIJI:
 982                chip_name = "fiji";
 983                break;
 984        case CHIP_STONEY:
 985                chip_name = "stoney";
 986                break;
 987        case CHIP_POLARIS10:
 988                chip_name = "polaris10";
 989                break;
 990        case CHIP_POLARIS11:
 991                chip_name = "polaris11";
 992                break;
 993        case CHIP_POLARIS12:
 994                chip_name = "polaris12";
 995                break;
 996        case CHIP_VEGAM:
 997                chip_name = "vegam";
 998                break;
 999        default:
1000                BUG();
1001        }
1002
1003        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1005                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1006                if (err == -ENOENT) {
1007                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1008                        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                }
1010        } else {
1011                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1012                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1013        }
1014        if (err)
1015                goto out;
1016        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1017        if (err)
1018                goto out;
1019        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1020        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1024                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1025                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1026                if (err == -ENOENT) {
1027                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1028                        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                }
1030        } else {
1031                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1032                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1033        }
1034        if (err)
1035                goto out;
1036        err = amdgpu_ucode_validate(adev->gfx.me_fw);
1037        if (err)
1038                goto out;
1039        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1040        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1041
1042        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043
1044        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1045                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1046                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1047                if (err == -ENOENT) {
1048                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049                        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                }
1051        } else {
1052                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1053                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1054        }
1055        if (err)
1056                goto out;
1057        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1058        if (err)
1059                goto out;
1060        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1061        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1062        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1063
1064        /*
1065         * Support for MCBP/Virtualization in combination with chained IBs is
1066         * formal released on feature version #46
1067         */
1068        if (adev->gfx.ce_feature_version >= 46 &&
1069            adev->gfx.pfp_feature_version >= 46) {
1070                adev->virt.chained_ib_support = true;
1071                DRM_INFO("Chained IB support enabled!\n");
1072        } else
1073                adev->virt.chained_ib_support = false;
1074
1075        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1076        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1077        if (err)
1078                goto out;
1079        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1080        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1081        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1082        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1083
1084        adev->gfx.rlc.save_and_restore_offset =
1085                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
1086        adev->gfx.rlc.clear_state_descriptor_offset =
1087                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1088        adev->gfx.rlc.avail_scratch_ram_locations =
1089                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1090        adev->gfx.rlc.reg_restore_list_size =
1091                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
1092        adev->gfx.rlc.reg_list_format_start =
1093                        le32_to_cpu(rlc_hdr->reg_list_format_start);
1094        adev->gfx.rlc.reg_list_format_separate_start =
1095                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1096        adev->gfx.rlc.starting_offsets_start =
1097                        le32_to_cpu(rlc_hdr->starting_offsets_start);
1098        adev->gfx.rlc.reg_list_format_size_bytes =
1099                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1100        adev->gfx.rlc.reg_list_size_bytes =
1101                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1102
1103        adev->gfx.rlc.register_list_format =
1104                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1105                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1106
1107        if (!adev->gfx.rlc.register_list_format) {
1108                err = -ENOMEM;
1109                goto out;
1110        }
1111
1112        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1113                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1114        for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1115                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1116
1117        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1118
1119        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1120                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1121        for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1122                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1123
1124        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1125                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1126                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1127                if (err == -ENOENT) {
1128                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1129                        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                }
1131        } else {
1132                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1133                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1134        }
1135        if (err)
1136                goto out;
1137        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1138        if (err)
1139                goto out;
1140        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1141        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1142        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1143
1144        if ((adev->asic_type != CHIP_STONEY) &&
1145            (adev->asic_type != CHIP_TOPAZ)) {
1146                if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1147                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1148                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1149                        if (err == -ENOENT) {
1150                                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1151                                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                        }
1153                } else {
1154                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1155                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1156                }
1157                if (!err) {
1158                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1159                        if (err)
1160                                goto out;
1161                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1162                                adev->gfx.mec2_fw->data;
1163                        adev->gfx.mec2_fw_version =
1164                                le32_to_cpu(cp_hdr->header.ucode_version);
1165                        adev->gfx.mec2_feature_version =
1166                                le32_to_cpu(cp_hdr->ucode_feature_version);
1167                } else {
1168                        err = 0;
1169                        adev->gfx.mec2_fw = NULL;
1170                }
1171        }
1172
1173        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1174        info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1175        info->fw = adev->gfx.pfp_fw;
1176        header = (const struct common_firmware_header *)info->fw->data;
1177        adev->firmware.fw_size +=
1178                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1179
1180        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1181        info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1182        info->fw = adev->gfx.me_fw;
1183        header = (const struct common_firmware_header *)info->fw->data;
1184        adev->firmware.fw_size +=
1185                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1186
1187        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1188        info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1189        info->fw = adev->gfx.ce_fw;
1190        header = (const struct common_firmware_header *)info->fw->data;
1191        adev->firmware.fw_size +=
1192                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1193
1194        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1195        info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1196        info->fw = adev->gfx.rlc_fw;
1197        header = (const struct common_firmware_header *)info->fw->data;
1198        adev->firmware.fw_size +=
1199                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1200
1201        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1202        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1203        info->fw = adev->gfx.mec_fw;
1204        header = (const struct common_firmware_header *)info->fw->data;
1205        adev->firmware.fw_size +=
1206                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1207
1208        /* we need account JT in */
1209        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210        adev->firmware.fw_size +=
1211                ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1212
1213        if (amdgpu_sriov_vf(adev)) {
1214                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1215                info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1216                info->fw = adev->gfx.mec_fw;
1217                adev->firmware.fw_size +=
1218                        ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1219        }
1220
1221        if (adev->gfx.mec2_fw) {
1222                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1223                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1224                info->fw = adev->gfx.mec2_fw;
1225                header = (const struct common_firmware_header *)info->fw->data;
1226                adev->firmware.fw_size +=
1227                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1228        }
1229
1230out:
1231        if (err) {
1232                dev_err(adev->dev,
1233                        "gfx8: Failed to load firmware \"%s\"\n",
1234                        fw_name);
1235                release_firmware(adev->gfx.pfp_fw);
1236                adev->gfx.pfp_fw = NULL;
1237                release_firmware(adev->gfx.me_fw);
1238                adev->gfx.me_fw = NULL;
1239                release_firmware(adev->gfx.ce_fw);
1240                adev->gfx.ce_fw = NULL;
1241                release_firmware(adev->gfx.rlc_fw);
1242                adev->gfx.rlc_fw = NULL;
1243                release_firmware(adev->gfx.mec_fw);
1244                adev->gfx.mec_fw = NULL;
1245                release_firmware(adev->gfx.mec2_fw);
1246                adev->gfx.mec2_fw = NULL;
1247        }
1248        return err;
1249}
1250
1251static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1252                                    volatile u32 *buffer)
1253{
1254        u32 count = 0, i;
1255        const struct cs_section_def *sect = NULL;
1256        const struct cs_extent_def *ext = NULL;
1257
1258        if (adev->gfx.rlc.cs_data == NULL)
1259                return;
1260        if (buffer == NULL)
1261                return;
1262
1263        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1264        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1265
1266        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1267        buffer[count++] = cpu_to_le32(0x80000000);
1268        buffer[count++] = cpu_to_le32(0x80000000);
1269
1270        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1271                for (ext = sect->section; ext->extent != NULL; ++ext) {
1272                        if (sect->id == SECT_CONTEXT) {
1273                                buffer[count++] =
1274                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1275                                buffer[count++] = cpu_to_le32(ext->reg_index -
1276                                                PACKET3_SET_CONTEXT_REG_START);
1277                                for (i = 0; i < ext->reg_count; i++)
1278                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1279                        } else {
1280                                return;
1281                        }
1282                }
1283        }
1284
1285        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1286        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1287                        PACKET3_SET_CONTEXT_REG_START);
1288        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1289        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1290
1291        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1292        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1293
1294        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1295        buffer[count++] = cpu_to_le32(0);
1296}
1297
1298static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1299{
1300        if (adev->asic_type == CHIP_CARRIZO)
1301                return 5;
1302        else
1303                return 4;
1304}
1305
1306static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307{
1308        const struct cs_section_def *cs_data;
1309        int r;
1310
1311        adev->gfx.rlc.cs_data = vi_cs_data;
1312
1313        cs_data = adev->gfx.rlc.cs_data;
1314
1315        if (cs_data) {
1316                /* init clear state block */
1317                r = amdgpu_gfx_rlc_init_csb(adev);
1318                if (r)
1319                        return r;
1320        }
1321
1322        if ((adev->asic_type == CHIP_CARRIZO) ||
1323            (adev->asic_type == CHIP_STONEY)) {
1324                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1325                r = amdgpu_gfx_rlc_init_cpt(adev);
1326                if (r)
1327                        return r;
1328        }
1329
1330        /* init spm vmid with 0xf */
1331        if (adev->gfx.rlc.funcs->update_spm_vmid)
1332                adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1333
1334        return 0;
1335}
1336
1337static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1338{
1339        amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1340}
1341
1342static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1343{
1344        int r;
1345        u32 *hpd;
1346        size_t mec_hpd_size;
1347
1348        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1349
1350        /* take ownership of the relevant compute queues */
1351        amdgpu_gfx_compute_queue_acquire(adev);
1352
1353        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1354        if (mec_hpd_size) {
1355                r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1356                                              AMDGPU_GEM_DOMAIN_VRAM,
1357                                              &adev->gfx.mec.hpd_eop_obj,
1358                                              &adev->gfx.mec.hpd_eop_gpu_addr,
1359                                              (void **)&hpd);
1360                if (r) {
1361                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1362                        return r;
1363                }
1364
1365                memset(hpd, 0, mec_hpd_size);
1366
1367                amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1368                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1369        }
1370
1371        return 0;
1372}
1373
1374static const u32 vgpr_init_compute_shader[] =
1375{
1376        0x7e000209, 0x7e020208,
1377        0x7e040207, 0x7e060206,
1378        0x7e080205, 0x7e0a0204,
1379        0x7e0c0203, 0x7e0e0202,
1380        0x7e100201, 0x7e120200,
1381        0x7e140209, 0x7e160208,
1382        0x7e180207, 0x7e1a0206,
1383        0x7e1c0205, 0x7e1e0204,
1384        0x7e200203, 0x7e220202,
1385        0x7e240201, 0x7e260200,
1386        0x7e280209, 0x7e2a0208,
1387        0x7e2c0207, 0x7e2e0206,
1388        0x7e300205, 0x7e320204,
1389        0x7e340203, 0x7e360202,
1390        0x7e380201, 0x7e3a0200,
1391        0x7e3c0209, 0x7e3e0208,
1392        0x7e400207, 0x7e420206,
1393        0x7e440205, 0x7e460204,
1394        0x7e480203, 0x7e4a0202,
1395        0x7e4c0201, 0x7e4e0200,
1396        0x7e500209, 0x7e520208,
1397        0x7e540207, 0x7e560206,
1398        0x7e580205, 0x7e5a0204,
1399        0x7e5c0203, 0x7e5e0202,
1400        0x7e600201, 0x7e620200,
1401        0x7e640209, 0x7e660208,
1402        0x7e680207, 0x7e6a0206,
1403        0x7e6c0205, 0x7e6e0204,
1404        0x7e700203, 0x7e720202,
1405        0x7e740201, 0x7e760200,
1406        0x7e780209, 0x7e7a0208,
1407        0x7e7c0207, 0x7e7e0206,
1408        0xbf8a0000, 0xbf810000,
1409};
1410
1411static const u32 sgpr_init_compute_shader[] =
1412{
1413        0xbe8a0100, 0xbe8c0102,
1414        0xbe8e0104, 0xbe900106,
1415        0xbe920108, 0xbe940100,
1416        0xbe960102, 0xbe980104,
1417        0xbe9a0106, 0xbe9c0108,
1418        0xbe9e0100, 0xbea00102,
1419        0xbea20104, 0xbea40106,
1420        0xbea60108, 0xbea80100,
1421        0xbeaa0102, 0xbeac0104,
1422        0xbeae0106, 0xbeb00108,
1423        0xbeb20100, 0xbeb40102,
1424        0xbeb60104, 0xbeb80106,
1425        0xbeba0108, 0xbebc0100,
1426        0xbebe0102, 0xbec00104,
1427        0xbec20106, 0xbec40108,
1428        0xbec60100, 0xbec80102,
1429        0xbee60004, 0xbee70005,
1430        0xbeea0006, 0xbeeb0007,
1431        0xbee80008, 0xbee90009,
1432        0xbefc0000, 0xbf8a0000,
1433        0xbf810000, 0x00000000,
1434};
1435
1436static const u32 vgpr_init_regs[] =
1437{
1438        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1439        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1440        mmCOMPUTE_NUM_THREAD_X, 256*4,
1441        mmCOMPUTE_NUM_THREAD_Y, 1,
1442        mmCOMPUTE_NUM_THREAD_Z, 1,
1443        mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1444        mmCOMPUTE_PGM_RSRC2, 20,
1445        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1446        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1447        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1448        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1449        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1450        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1451        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1452        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1453        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1454        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1455};
1456
1457static const u32 sgpr1_init_regs[] =
1458{
1459        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1460        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1461        mmCOMPUTE_NUM_THREAD_X, 256*5,
1462        mmCOMPUTE_NUM_THREAD_Y, 1,
1463        mmCOMPUTE_NUM_THREAD_Z, 1,
1464        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1465        mmCOMPUTE_PGM_RSRC2, 20,
1466        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1467        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1468        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1469        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1470        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1471        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1472        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1473        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1474        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1475        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1476};
1477
1478static const u32 sgpr2_init_regs[] =
1479{
1480        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1481        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1482        mmCOMPUTE_NUM_THREAD_X, 256*5,
1483        mmCOMPUTE_NUM_THREAD_Y, 1,
1484        mmCOMPUTE_NUM_THREAD_Z, 1,
1485        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1486        mmCOMPUTE_PGM_RSRC2, 20,
1487        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497};
1498
1499static const u32 sec_ded_counter_registers[] =
1500{
1501        mmCPC_EDC_ATC_CNT,
1502        mmCPC_EDC_SCRATCH_CNT,
1503        mmCPC_EDC_UCODE_CNT,
1504        mmCPF_EDC_ATC_CNT,
1505        mmCPF_EDC_ROQ_CNT,
1506        mmCPF_EDC_TAG_CNT,
1507        mmCPG_EDC_ATC_CNT,
1508        mmCPG_EDC_DMA_CNT,
1509        mmCPG_EDC_TAG_CNT,
1510        mmDC_EDC_CSINVOC_CNT,
1511        mmDC_EDC_RESTORE_CNT,
1512        mmDC_EDC_STATE_CNT,
1513        mmGDS_EDC_CNT,
1514        mmGDS_EDC_GRBM_CNT,
1515        mmGDS_EDC_OA_DED,
1516        mmSPI_EDC_CNT,
1517        mmSQC_ATC_EDC_GATCL1_CNT,
1518        mmSQC_EDC_CNT,
1519        mmSQ_EDC_DED_CNT,
1520        mmSQ_EDC_INFO,
1521        mmSQ_EDC_SEC_CNT,
1522        mmTCC_EDC_CNT,
1523        mmTCP_ATC_EDC_GATCL1_CNT,
1524        mmTCP_EDC_CNT,
1525        mmTD_EDC_CNT
1526};
1527
1528static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1529{
1530        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1531        struct amdgpu_ib ib;
1532        struct dma_fence *f = NULL;
1533        int r, i;
1534        u32 tmp;
1535        unsigned total_size, vgpr_offset, sgpr_offset;
1536        u64 gpu_addr;
1537
1538        /* only supported on CZ */
1539        if (adev->asic_type != CHIP_CARRIZO)
1540                return 0;
1541
1542        /* bail if the compute ring is not ready */
1543        if (!ring->sched.ready)
1544                return 0;
1545
1546        tmp = RREG32(mmGB_EDC_MODE);
1547        WREG32(mmGB_EDC_MODE, 0);
1548
1549        total_size =
1550                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1551        total_size +=
1552                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1553        total_size +=
1554                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555        total_size = ALIGN(total_size, 256);
1556        vgpr_offset = total_size;
1557        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1558        sgpr_offset = total_size;
1559        total_size += sizeof(sgpr_init_compute_shader);
1560
1561        /* allocate an indirect buffer to put the commands in */
1562        memset(&ib, 0, sizeof(ib));
1563        r = amdgpu_ib_get(adev, NULL, total_size,
1564                                        AMDGPU_IB_POOL_DIRECT, &ib);
1565        if (r) {
1566                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1567                return r;
1568        }
1569
1570        /* load the compute shaders */
1571        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1572                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1573
1574        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1575                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1576
1577        /* init the ib length to 0 */
1578        ib.length_dw = 0;
1579
1580        /* VGPR */
1581        /* write the register state for the compute dispatch */
1582        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1583                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1584                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1585                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1586        }
1587        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1588        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1589        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1590        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1591        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1592        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1593
1594        /* write dispatch packet */
1595        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1596        ib.ptr[ib.length_dw++] = 8; /* x */
1597        ib.ptr[ib.length_dw++] = 1; /* y */
1598        ib.ptr[ib.length_dw++] = 1; /* z */
1599        ib.ptr[ib.length_dw++] =
1600                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1601
1602        /* write CS partial flush packet */
1603        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1604        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1605
1606        /* SGPR1 */
1607        /* write the register state for the compute dispatch */
1608        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1609                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1610                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1611                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1612        }
1613        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1614        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1615        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1616        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1617        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1618        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1619
1620        /* write dispatch packet */
1621        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1622        ib.ptr[ib.length_dw++] = 8; /* x */
1623        ib.ptr[ib.length_dw++] = 1; /* y */
1624        ib.ptr[ib.length_dw++] = 1; /* z */
1625        ib.ptr[ib.length_dw++] =
1626                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1627
1628        /* write CS partial flush packet */
1629        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1630        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1631
1632        /* SGPR2 */
1633        /* write the register state for the compute dispatch */
1634        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1635                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1636                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1637                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1638        }
1639        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1640        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1641        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1642        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1643        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1644        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1645
1646        /* write dispatch packet */
1647        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1648        ib.ptr[ib.length_dw++] = 8; /* x */
1649        ib.ptr[ib.length_dw++] = 1; /* y */
1650        ib.ptr[ib.length_dw++] = 1; /* z */
1651        ib.ptr[ib.length_dw++] =
1652                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1653
1654        /* write CS partial flush packet */
1655        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1656        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1657
1658        /* shedule the ib on the ring */
1659        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1660        if (r) {
1661                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1662                goto fail;
1663        }
1664
1665        /* wait for the GPU to finish processing the IB */
1666        r = dma_fence_wait(f, false);
1667        if (r) {
1668                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1669                goto fail;
1670        }
1671
1672        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1673        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1674        WREG32(mmGB_EDC_MODE, tmp);
1675
1676        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1677        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1678        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1679
1680
1681        /* read back registers to clear the counters */
1682        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1683                RREG32(sec_ded_counter_registers[i]);
1684
1685fail:
1686        amdgpu_ib_free(adev, &ib, NULL);
1687        dma_fence_put(f);
1688
1689        return r;
1690}
1691
1692static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1693{
1694        u32 gb_addr_config;
1695        u32 mc_arb_ramcfg;
1696        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1697        u32 tmp;
1698        int ret;
1699
1700        switch (adev->asic_type) {
1701        case CHIP_TOPAZ:
1702                adev->gfx.config.max_shader_engines = 1;
1703                adev->gfx.config.max_tile_pipes = 2;
1704                adev->gfx.config.max_cu_per_sh = 6;
1705                adev->gfx.config.max_sh_per_se = 1;
1706                adev->gfx.config.max_backends_per_se = 2;
1707                adev->gfx.config.max_texture_channel_caches = 2;
1708                adev->gfx.config.max_gprs = 256;
1709                adev->gfx.config.max_gs_threads = 32;
1710                adev->gfx.config.max_hw_contexts = 8;
1711
1712                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1713                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1714                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1715                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1716                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1717                break;
1718        case CHIP_FIJI:
1719                adev->gfx.config.max_shader_engines = 4;
1720                adev->gfx.config.max_tile_pipes = 16;
1721                adev->gfx.config.max_cu_per_sh = 16;
1722                adev->gfx.config.max_sh_per_se = 1;
1723                adev->gfx.config.max_backends_per_se = 4;
1724                adev->gfx.config.max_texture_channel_caches = 16;
1725                adev->gfx.config.max_gprs = 256;
1726                adev->gfx.config.max_gs_threads = 32;
1727                adev->gfx.config.max_hw_contexts = 8;
1728
1729                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1734                break;
1735        case CHIP_POLARIS11:
1736        case CHIP_POLARIS12:
1737                ret = amdgpu_atombios_get_gfx_info(adev);
1738                if (ret)
1739                        return ret;
1740                adev->gfx.config.max_gprs = 256;
1741                adev->gfx.config.max_gs_threads = 32;
1742                adev->gfx.config.max_hw_contexts = 8;
1743
1744                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1745                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1746                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1747                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1748                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1749                break;
1750        case CHIP_POLARIS10:
1751        case CHIP_VEGAM:
1752                ret = amdgpu_atombios_get_gfx_info(adev);
1753                if (ret)
1754                        return ret;
1755                adev->gfx.config.max_gprs = 256;
1756                adev->gfx.config.max_gs_threads = 32;
1757                adev->gfx.config.max_hw_contexts = 8;
1758
1759                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1764                break;
1765        case CHIP_TONGA:
1766                adev->gfx.config.max_shader_engines = 4;
1767                adev->gfx.config.max_tile_pipes = 8;
1768                adev->gfx.config.max_cu_per_sh = 8;
1769                adev->gfx.config.max_sh_per_se = 1;
1770                adev->gfx.config.max_backends_per_se = 2;
1771                adev->gfx.config.max_texture_channel_caches = 8;
1772                adev->gfx.config.max_gprs = 256;
1773                adev->gfx.config.max_gs_threads = 32;
1774                adev->gfx.config.max_hw_contexts = 8;
1775
1776                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1781                break;
1782        case CHIP_CARRIZO:
1783                adev->gfx.config.max_shader_engines = 1;
1784                adev->gfx.config.max_tile_pipes = 2;
1785                adev->gfx.config.max_sh_per_se = 1;
1786                adev->gfx.config.max_backends_per_se = 2;
1787                adev->gfx.config.max_cu_per_sh = 8;
1788                adev->gfx.config.max_texture_channel_caches = 2;
1789                adev->gfx.config.max_gprs = 256;
1790                adev->gfx.config.max_gs_threads = 32;
1791                adev->gfx.config.max_hw_contexts = 8;
1792
1793                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798                break;
1799        case CHIP_STONEY:
1800                adev->gfx.config.max_shader_engines = 1;
1801                adev->gfx.config.max_tile_pipes = 2;
1802                adev->gfx.config.max_sh_per_se = 1;
1803                adev->gfx.config.max_backends_per_se = 1;
1804                adev->gfx.config.max_cu_per_sh = 3;
1805                adev->gfx.config.max_texture_channel_caches = 2;
1806                adev->gfx.config.max_gprs = 256;
1807                adev->gfx.config.max_gs_threads = 16;
1808                adev->gfx.config.max_hw_contexts = 8;
1809
1810                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1815                break;
1816        default:
1817                adev->gfx.config.max_shader_engines = 2;
1818                adev->gfx.config.max_tile_pipes = 4;
1819                adev->gfx.config.max_cu_per_sh = 2;
1820                adev->gfx.config.max_sh_per_se = 1;
1821                adev->gfx.config.max_backends_per_se = 2;
1822                adev->gfx.config.max_texture_channel_caches = 4;
1823                adev->gfx.config.max_gprs = 256;
1824                adev->gfx.config.max_gs_threads = 32;
1825                adev->gfx.config.max_hw_contexts = 8;
1826
1827                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1832                break;
1833        }
1834
1835        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1836        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1837
1838        adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1839                                MC_ARB_RAMCFG, NOOFBANK);
1840        adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1841                                MC_ARB_RAMCFG, NOOFRANKS);
1842
1843        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1844        adev->gfx.config.mem_max_burst_length_bytes = 256;
1845        if (adev->flags & AMD_IS_APU) {
1846                /* Get memory bank mapping mode. */
1847                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1848                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1849                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1850
1851                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1852                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1853                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1854
1855                /* Validate settings in case only one DIMM installed. */
1856                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1857                        dimm00_addr_map = 0;
1858                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1859                        dimm01_addr_map = 0;
1860                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1861                        dimm10_addr_map = 0;
1862                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1863                        dimm11_addr_map = 0;
1864
1865                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1866                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1867                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1868                        adev->gfx.config.mem_row_size_in_kb = 2;
1869                else
1870                        adev->gfx.config.mem_row_size_in_kb = 1;
1871        } else {
1872                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1873                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1874                if (adev->gfx.config.mem_row_size_in_kb > 4)
1875                        adev->gfx.config.mem_row_size_in_kb = 4;
1876        }
1877
1878        adev->gfx.config.shader_engine_tile_size = 32;
1879        adev->gfx.config.num_gpus = 1;
1880        adev->gfx.config.multi_gpu_tile_size = 64;
1881
1882        /* fix up row size */
1883        switch (adev->gfx.config.mem_row_size_in_kb) {
1884        case 1:
1885        default:
1886                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1887                break;
1888        case 2:
1889                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1890                break;
1891        case 4:
1892                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1893                break;
1894        }
1895        adev->gfx.config.gb_addr_config = gb_addr_config;
1896
1897        return 0;
1898}
1899
1900static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1901                                        int mec, int pipe, int queue)
1902{
1903        int r;
1904        unsigned irq_type;
1905        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1906        unsigned int hw_prio;
1907
1908        ring = &adev->gfx.compute_ring[ring_id];
1909
1910        /* mec0 is me1 */
1911        ring->me = mec + 1;
1912        ring->pipe = pipe;
1913        ring->queue = queue;
1914
1915        ring->ring_obj = NULL;
1916        ring->use_doorbell = true;
1917        ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1918        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1919                                + (ring_id * GFX8_MEC_HPD_SIZE);
1920        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1921
1922        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1923                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1924                + ring->pipe;
1925
1926        hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
1927                                                            ring->queue) ?
1928                        AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1929        /* type-2 packets are deprecated on MEC, use type-3 instead */
1930        r = amdgpu_ring_init(adev, ring, 1024,
1931                             &adev->gfx.eop_irq, irq_type, hw_prio);
1932        if (r)
1933                return r;
1934
1935
1936        return 0;
1937}
1938
1939static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1940
1941static int gfx_v8_0_sw_init(void *handle)
1942{
1943        int i, j, k, r, ring_id;
1944        struct amdgpu_ring *ring;
1945        struct amdgpu_kiq *kiq;
1946        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947
1948        switch (adev->asic_type) {
1949        case CHIP_TONGA:
1950        case CHIP_CARRIZO:
1951        case CHIP_FIJI:
1952        case CHIP_POLARIS10:
1953        case CHIP_POLARIS11:
1954        case CHIP_POLARIS12:
1955        case CHIP_VEGAM:
1956                adev->gfx.mec.num_mec = 2;
1957                break;
1958        case CHIP_TOPAZ:
1959        case CHIP_STONEY:
1960        default:
1961                adev->gfx.mec.num_mec = 1;
1962                break;
1963        }
1964
1965        adev->gfx.mec.num_pipe_per_mec = 4;
1966        adev->gfx.mec.num_queue_per_pipe = 8;
1967
1968        /* EOP Event */
1969        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1970        if (r)
1971                return r;
1972
1973        /* Privileged reg */
1974        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1975                              &adev->gfx.priv_reg_irq);
1976        if (r)
1977                return r;
1978
1979        /* Privileged inst */
1980        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1981                              &adev->gfx.priv_inst_irq);
1982        if (r)
1983                return r;
1984
1985        /* Add CP EDC/ECC irq  */
1986        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1987                              &adev->gfx.cp_ecc_error_irq);
1988        if (r)
1989                return r;
1990
1991        /* SQ interrupts. */
1992        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1993                              &adev->gfx.sq_irq);
1994        if (r) {
1995                DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1996                return r;
1997        }
1998
1999        INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2000
2001        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2002
2003        gfx_v8_0_scratch_init(adev);
2004
2005        r = gfx_v8_0_init_microcode(adev);
2006        if (r) {
2007                DRM_ERROR("Failed to load gfx firmware!\n");
2008                return r;
2009        }
2010
2011        r = adev->gfx.rlc.funcs->init(adev);
2012        if (r) {
2013                DRM_ERROR("Failed to init rlc BOs!\n");
2014                return r;
2015        }
2016
2017        r = gfx_v8_0_mec_init(adev);
2018        if (r) {
2019                DRM_ERROR("Failed to init MEC BOs!\n");
2020                return r;
2021        }
2022
2023        /* set up the gfx ring */
2024        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2025                ring = &adev->gfx.gfx_ring[i];
2026                ring->ring_obj = NULL;
2027                sprintf(ring->name, "gfx");
2028                /* no gfx doorbells on iceland */
2029                if (adev->asic_type != CHIP_TOPAZ) {
2030                        ring->use_doorbell = true;
2031                        ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2032                }
2033
2034                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2035                                     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2036                                     AMDGPU_RING_PRIO_DEFAULT);
2037                if (r)
2038                        return r;
2039        }
2040
2041
2042        /* set up the compute queues - allocate horizontally across pipes */
2043        ring_id = 0;
2044        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2045                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2046                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2047                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2048                                        continue;
2049
2050                                r = gfx_v8_0_compute_ring_init(adev,
2051                                                                ring_id,
2052                                                                i, k, j);
2053                                if (r)
2054                                        return r;
2055
2056                                ring_id++;
2057                        }
2058                }
2059        }
2060
2061        r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2062        if (r) {
2063                DRM_ERROR("Failed to init KIQ BOs!\n");
2064                return r;
2065        }
2066
2067        kiq = &adev->gfx.kiq;
2068        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2069        if (r)
2070                return r;
2071
2072        /* create MQD for all compute queues as well as KIQ for SRIOV case */
2073        r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2074        if (r)
2075                return r;
2076
2077        adev->gfx.ce_ram_size = 0x8000;
2078
2079        r = gfx_v8_0_gpu_early_init(adev);
2080        if (r)
2081                return r;
2082
2083        return 0;
2084}
2085
2086static int gfx_v8_0_sw_fini(void *handle)
2087{
2088        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2089        int i;
2090
2091        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2092                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2093        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2094                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2095
2096        amdgpu_gfx_mqd_sw_fini(adev);
2097        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2098        amdgpu_gfx_kiq_fini(adev);
2099
2100        gfx_v8_0_mec_fini(adev);
2101        amdgpu_gfx_rlc_fini(adev);
2102        amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2103                                &adev->gfx.rlc.clear_state_gpu_addr,
2104                                (void **)&adev->gfx.rlc.cs_ptr);
2105        if ((adev->asic_type == CHIP_CARRIZO) ||
2106            (adev->asic_type == CHIP_STONEY)) {
2107                amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2108                                &adev->gfx.rlc.cp_table_gpu_addr,
2109                                (void **)&adev->gfx.rlc.cp_table_ptr);
2110        }
2111        gfx_v8_0_free_microcode(adev);
2112
2113        return 0;
2114}
2115
2116static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117{
2118        uint32_t *modearray, *mod2array;
2119        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2120        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2121        u32 reg_offset;
2122
2123        modearray = adev->gfx.config.tile_mode_array;
2124        mod2array = adev->gfx.config.macrotile_mode_array;
2125
2126        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2127                modearray[reg_offset] = 0;
2128
2129        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2130                mod2array[reg_offset] = 0;
2131
2132        switch (adev->asic_type) {
2133        case CHIP_TOPAZ:
2134                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                PIPE_CONFIG(ADDR_SURF_P2) |
2136                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2137                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                PIPE_CONFIG(ADDR_SURF_P2) |
2140                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2141                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143                                PIPE_CONFIG(ADDR_SURF_P2) |
2144                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2145                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147                                PIPE_CONFIG(ADDR_SURF_P2) |
2148                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2149                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                PIPE_CONFIG(ADDR_SURF_P2) |
2152                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2153                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155                                PIPE_CONFIG(ADDR_SURF_P2) |
2156                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2157                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                PIPE_CONFIG(ADDR_SURF_P2) |
2160                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163                                PIPE_CONFIG(ADDR_SURF_P2));
2164                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                PIPE_CONFIG(ADDR_SURF_P2) |
2166                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2185                                 PIPE_CONFIG(ADDR_SURF_P2) |
2186                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                 PIPE_CONFIG(ADDR_SURF_P2) |
2190                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2193                                 PIPE_CONFIG(ADDR_SURF_P2) |
2194                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2197                                 PIPE_CONFIG(ADDR_SURF_P2) |
2198                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2201                                 PIPE_CONFIG(ADDR_SURF_P2) |
2202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2205                                 PIPE_CONFIG(ADDR_SURF_P2) |
2206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2209                                 PIPE_CONFIG(ADDR_SURF_P2) |
2210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2213                                 PIPE_CONFIG(ADDR_SURF_P2) |
2214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2217                                 PIPE_CONFIG(ADDR_SURF_P2) |
2218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2221                                 PIPE_CONFIG(ADDR_SURF_P2) |
2222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P2) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2227                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P2) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2231                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236
2237                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                NUM_BANKS(ADDR_SURF_8_BANK));
2241                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                NUM_BANKS(ADDR_SURF_8_BANK));
2245                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                NUM_BANKS(ADDR_SURF_8_BANK));
2249                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                NUM_BANKS(ADDR_SURF_8_BANK));
2253                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                NUM_BANKS(ADDR_SURF_8_BANK));
2257                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                NUM_BANKS(ADDR_SURF_8_BANK));
2261                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                NUM_BANKS(ADDR_SURF_8_BANK));
2265                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2266                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                NUM_BANKS(ADDR_SURF_16_BANK));
2269                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2270                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                NUM_BANKS(ADDR_SURF_16_BANK));
2273                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                 NUM_BANKS(ADDR_SURF_16_BANK));
2277                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280                                 NUM_BANKS(ADDR_SURF_16_BANK));
2281                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                 NUM_BANKS(ADDR_SURF_16_BANK));
2285                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_16_BANK));
2289                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292                                 NUM_BANKS(ADDR_SURF_8_BANK));
2293
2294                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2295                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296                            reg_offset != 23)
2297                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298
2299                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300                        if (reg_offset != 7)
2301                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2302
2303                break;
2304        case CHIP_FIJI:
2305        case CHIP_VEGAM:
2306                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428
2429                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                NUM_BANKS(ADDR_SURF_8_BANK));
2433                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                NUM_BANKS(ADDR_SURF_8_BANK));
2437                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                NUM_BANKS(ADDR_SURF_8_BANK));
2441                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                NUM_BANKS(ADDR_SURF_8_BANK));
2445                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                NUM_BANKS(ADDR_SURF_8_BANK));
2449                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                NUM_BANKS(ADDR_SURF_8_BANK));
2453                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                NUM_BANKS(ADDR_SURF_8_BANK));
2457                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                NUM_BANKS(ADDR_SURF_8_BANK));
2461                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                NUM_BANKS(ADDR_SURF_8_BANK));
2465                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                 NUM_BANKS(ADDR_SURF_8_BANK));
2477                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                 NUM_BANKS(ADDR_SURF_8_BANK));
2481                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                 NUM_BANKS(ADDR_SURF_4_BANK));
2485
2486                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488
2489                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490                        if (reg_offset != 7)
2491                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2492
2493                break;
2494        case CHIP_TONGA:
2495                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617
2618                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                NUM_BANKS(ADDR_SURF_16_BANK));
2622                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                NUM_BANKS(ADDR_SURF_16_BANK));
2626                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                NUM_BANKS(ADDR_SURF_16_BANK));
2630                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                NUM_BANKS(ADDR_SURF_16_BANK));
2634                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                NUM_BANKS(ADDR_SURF_16_BANK));
2638                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                NUM_BANKS(ADDR_SURF_16_BANK));
2642                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                NUM_BANKS(ADDR_SURF_16_BANK));
2646                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649                                NUM_BANKS(ADDR_SURF_16_BANK));
2650                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653                                NUM_BANKS(ADDR_SURF_16_BANK));
2654                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                 NUM_BANKS(ADDR_SURF_8_BANK));
2666                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669                                 NUM_BANKS(ADDR_SURF_4_BANK));
2670                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                 NUM_BANKS(ADDR_SURF_4_BANK));
2674
2675                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677
2678                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679                        if (reg_offset != 7)
2680                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2681
2682                break;
2683        case CHIP_POLARIS11:
2684        case CHIP_POLARIS12:
2685                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861                                NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866                                NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876                                NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882                        if (reg_offset != 7)
2883                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885                break;
2886        case CHIP_POLARIS10:
2887                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038                                NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058                                NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073                                NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078                                NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084                        if (reg_offset != 7)
3085                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087                break;
3088        case CHIP_STONEY:
3089                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                PIPE_CONFIG(ADDR_SURF_P2) |
3091                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                PIPE_CONFIG(ADDR_SURF_P2) |
3095                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                PIPE_CONFIG(ADDR_SURF_P2) |
3099                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                PIPE_CONFIG(ADDR_SURF_P2) |
3103                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                PIPE_CONFIG(ADDR_SURF_P2) |
3107                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                PIPE_CONFIG(ADDR_SURF_P2) |
3111                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                PIPE_CONFIG(ADDR_SURF_P2) |
3115                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118                                PIPE_CONFIG(ADDR_SURF_P2));
3119                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120                                PIPE_CONFIG(ADDR_SURF_P2) |
3121                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2) |
3133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2) |
3137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                 PIPE_CONFIG(ADDR_SURF_P2) |
3145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148                                 PIPE_CONFIG(ADDR_SURF_P2) |
3149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152                                 PIPE_CONFIG(ADDR_SURF_P2) |
3153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156                                 PIPE_CONFIG(ADDR_SURF_P2) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160                                 PIPE_CONFIG(ADDR_SURF_P2) |
3161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164                                 PIPE_CONFIG(ADDR_SURF_P2) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168                                 PIPE_CONFIG(ADDR_SURF_P2) |
3169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172                                 PIPE_CONFIG(ADDR_SURF_P2) |
3173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176                                 PIPE_CONFIG(ADDR_SURF_P2) |
3177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                 PIPE_CONFIG(ADDR_SURF_P2) |
3181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184                                 PIPE_CONFIG(ADDR_SURF_P2) |
3185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                 PIPE_CONFIG(ADDR_SURF_P2) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                NUM_BANKS(ADDR_SURF_8_BANK));
3196                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                NUM_BANKS(ADDR_SURF_8_BANK));
3200                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                NUM_BANKS(ADDR_SURF_8_BANK));
3204                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                NUM_BANKS(ADDR_SURF_8_BANK));
3208                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                NUM_BANKS(ADDR_SURF_8_BANK));
3212                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                NUM_BANKS(ADDR_SURF_8_BANK));
3216                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                NUM_BANKS(ADDR_SURF_8_BANK));
3220                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                NUM_BANKS(ADDR_SURF_16_BANK));
3224                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                NUM_BANKS(ADDR_SURF_16_BANK));
3228                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                 NUM_BANKS(ADDR_SURF_16_BANK));
3232                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                 NUM_BANKS(ADDR_SURF_16_BANK));
3236                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                 NUM_BANKS(ADDR_SURF_16_BANK));
3240                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                 NUM_BANKS(ADDR_SURF_16_BANK));
3244                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                 NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251                            reg_offset != 23)
3252                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255                        if (reg_offset != 7)
3256                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258                break;
3259        default:
3260                dev_warn(adev->dev,
3261                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262                         adev->asic_type);
3263                fallthrough;
3264
3265        case CHIP_CARRIZO:
3266                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                PIPE_CONFIG(ADDR_SURF_P2) |
3268                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3269                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271                                PIPE_CONFIG(ADDR_SURF_P2) |
3272                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3273                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                PIPE_CONFIG(ADDR_SURF_P2) |
3276                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3277                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                PIPE_CONFIG(ADDR_SURF_P2) |
3280                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3281                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                PIPE_CONFIG(ADDR_SURF_P2) |
3284                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3285                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287                                PIPE_CONFIG(ADDR_SURF_P2) |
3288                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3289                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291                                PIPE_CONFIG(ADDR_SURF_P2) |
3292                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3295                                PIPE_CONFIG(ADDR_SURF_P2));
3296                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297                                PIPE_CONFIG(ADDR_SURF_P2) |
3298                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3299                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301                                 PIPE_CONFIG(ADDR_SURF_P2) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3303                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                 PIPE_CONFIG(ADDR_SURF_P2) |
3314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3315                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3317                                 PIPE_CONFIG(ADDR_SURF_P2) |
3318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3321                                 PIPE_CONFIG(ADDR_SURF_P2) |
3322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3324                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3325                                 PIPE_CONFIG(ADDR_SURF_P2) |
3326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3329                                 PIPE_CONFIG(ADDR_SURF_P2) |
3330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3333                                 PIPE_CONFIG(ADDR_SURF_P2) |
3334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3337                                 PIPE_CONFIG(ADDR_SURF_P2) |
3338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3341                                 PIPE_CONFIG(ADDR_SURF_P2) |
3342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                 PIPE_CONFIG(ADDR_SURF_P2) |
3346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3347                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3349                                 PIPE_CONFIG(ADDR_SURF_P2) |
3350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3353                                 PIPE_CONFIG(ADDR_SURF_P2) |
3354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3357                                 PIPE_CONFIG(ADDR_SURF_P2) |
3358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3359                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3360                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                 PIPE_CONFIG(ADDR_SURF_P2) |
3362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3364                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3365                                 PIPE_CONFIG(ADDR_SURF_P2) |
3366                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3368
3369                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372                                NUM_BANKS(ADDR_SURF_8_BANK));
3373                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3375                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                NUM_BANKS(ADDR_SURF_8_BANK));
3377                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                NUM_BANKS(ADDR_SURF_8_BANK));
3381                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3383                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3384                                NUM_BANKS(ADDR_SURF_8_BANK));
3385                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388                                NUM_BANKS(ADDR_SURF_8_BANK));
3389                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                NUM_BANKS(ADDR_SURF_8_BANK));
3393                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                NUM_BANKS(ADDR_SURF_8_BANK));
3397                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3398                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3399                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                NUM_BANKS(ADDR_SURF_16_BANK));
3401                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3402                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3403                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                NUM_BANKS(ADDR_SURF_16_BANK));
3405                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408                                 NUM_BANKS(ADDR_SURF_16_BANK));
3409                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                 NUM_BANKS(ADDR_SURF_16_BANK));
3421                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3424                                 NUM_BANKS(ADDR_SURF_8_BANK));
3425
3426                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3427                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3428                            reg_offset != 23)
3429                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3430
3431                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3432                        if (reg_offset != 7)
3433                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3434
3435                break;
3436        }
3437}
3438
3439static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3440                                  u32 se_num, u32 sh_num, u32 instance)
3441{
3442        u32 data;
3443
3444        if (instance == 0xffffffff)
3445                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3446        else
3447                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3448
3449        if (se_num == 0xffffffff)
3450                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3451        else
3452                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3453
3454        if (sh_num == 0xffffffff)
3455                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3456        else
3457                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458
3459        WREG32(mmGRBM_GFX_INDEX, data);
3460}
3461
3462static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3463                                  u32 me, u32 pipe, u32 q, u32 vm)
3464{
3465        vi_srbm_select(adev, me, pipe, q, vm);
3466}
3467
3468static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3469{
3470        u32 data, mask;
3471
3472        data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3473                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3474
3475        data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3476
3477        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3478                                         adev->gfx.config.max_sh_per_se);
3479
3480        return (~data) & mask;
3481}
3482
3483static void
3484gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3485{
3486        switch (adev->asic_type) {
3487        case CHIP_FIJI:
3488        case CHIP_VEGAM:
3489                *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3490                          RB_XSEL2(1) | PKR_MAP(2) |
3491                          PKR_XSEL(1) | PKR_YSEL(1) |
3492                          SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3493                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3494                           SE_PAIR_YSEL(2);
3495                break;
3496        case CHIP_TONGA:
3497        case CHIP_POLARIS10:
3498                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3499                          SE_XSEL(1) | SE_YSEL(1);
3500                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501                           SE_PAIR_YSEL(2);
3502                break;
3503        case CHIP_TOPAZ:
3504        case CHIP_CARRIZO:
3505                *rconf |= RB_MAP_PKR0(2);
3506                *rconf1 |= 0x0;
3507                break;
3508        case CHIP_POLARIS11:
3509        case CHIP_POLARIS12:
3510                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3511                          SE_XSEL(1) | SE_YSEL(1);
3512                *rconf1 |= 0x0;
3513                break;
3514        case CHIP_STONEY:
3515                *rconf |= 0x0;
3516                *rconf1 |= 0x0;
3517                break;
3518        default:
3519                DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520                break;
3521        }
3522}
3523
3524static void
3525gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3526                                        u32 raster_config, u32 raster_config_1,
3527                                        unsigned rb_mask, unsigned num_rb)
3528{
3529        unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3530        unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3531        unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3532        unsigned rb_per_se = num_rb / num_se;
3533        unsigned se_mask[4];
3534        unsigned se;
3535
3536        se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3537        se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3538        se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3539        se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540
3541        WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3542        WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3543        WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544
3545        if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3546                             (!se_mask[2] && !se_mask[3]))) {
3547                raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548
3549                if (!se_mask[0] && !se_mask[1]) {
3550                        raster_config_1 |=
3551                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3552                } else {
3553                        raster_config_1 |=
3554                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3555                }
3556        }
3557
3558        for (se = 0; se < num_se; se++) {
3559                unsigned raster_config_se = raster_config;
3560                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3561                unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3562                int idx = (se / 2) * 2;
3563
3564                if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3565                        raster_config_se &= ~SE_MAP_MASK;
3566
3567                        if (!se_mask[idx]) {
3568                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569                        } else {
3570                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3571                        }
3572                }
3573
3574                pkr0_mask &= rb_mask;
3575                pkr1_mask &= rb_mask;
3576                if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3577                        raster_config_se &= ~PKR_MAP_MASK;
3578
3579                        if (!pkr0_mask) {
3580                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581                        } else {
3582                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3583                        }
3584                }
3585
3586                if (rb_per_se >= 2) {
3587                        unsigned rb0_mask = 1 << (se * rb_per_se);
3588                        unsigned rb1_mask = rb0_mask << 1;
3589
3590                        rb0_mask &= rb_mask;
3591                        rb1_mask &= rb_mask;
3592                        if (!rb0_mask || !rb1_mask) {
3593                                raster_config_se &= ~RB_MAP_PKR0_MASK;
3594
3595                                if (!rb0_mask) {
3596                                        raster_config_se |=
3597                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3598                                } else {
3599                                        raster_config_se |=
3600                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3601                                }
3602                        }
3603
3604                        if (rb_per_se > 2) {
3605                                rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3606                                rb1_mask = rb0_mask << 1;
3607                                rb0_mask &= rb_mask;
3608                                rb1_mask &= rb_mask;
3609                                if (!rb0_mask || !rb1_mask) {
3610                                        raster_config_se &= ~RB_MAP_PKR1_MASK;
3611
3612                                        if (!rb0_mask) {
3613                                                raster_config_se |=
3614                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3615                                        } else {
3616                                                raster_config_se |=
3617                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618                                        }
3619                                }
3620                        }
3621                }
3622
3623                /* GRBM_GFX_INDEX has a different offset on VI */
3624                gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3625                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3626                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3627        }
3628
3629        /* GRBM_GFX_INDEX has a different offset on VI */
3630        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3631}
3632
3633static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3634{
3635        int i, j;
3636        u32 data;
3637        u32 raster_config = 0, raster_config_1 = 0;
3638        u32 active_rbs = 0;
3639        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3640                                        adev->gfx.config.max_sh_per_se;
3641        unsigned num_rb_pipes;
3642
3643        mutex_lock(&adev->grbm_idx_mutex);
3644        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3645                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3646                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3647                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3648                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3649                                               rb_bitmap_width_per_sh);
3650                }
3651        }
3652        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653
3654        adev->gfx.config.backend_enable_mask = active_rbs;
3655        adev->gfx.config.num_rbs = hweight32(active_rbs);
3656
3657        num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3658                             adev->gfx.config.max_shader_engines, 16);
3659
3660        gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661
3662        if (!adev->gfx.config.backend_enable_mask ||
3663                        adev->gfx.config.num_rbs >= num_rb_pipes) {
3664                WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3665                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666        } else {
3667                gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3668                                                        adev->gfx.config.backend_enable_mask,
3669                                                        num_rb_pipes);
3670        }
3671
3672        /* cache the values for userspace */
3673        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3674                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3675                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3676                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
3677                                RREG32(mmCC_RB_BACKEND_DISABLE);
3678                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3679                                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3680                        adev->gfx.config.rb_config[i][j].raster_config =
3681                                RREG32(mmPA_SC_RASTER_CONFIG);
3682                        adev->gfx.config.rb_config[i][j].raster_config_1 =
3683                                RREG32(mmPA_SC_RASTER_CONFIG_1);
3684                }
3685        }
3686        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687        mutex_unlock(&adev->grbm_idx_mutex);
3688}
3689
3690#define DEFAULT_SH_MEM_BASES    (0x6000)
3691/**
3692 * gfx_v8_0_init_compute_vmid - gart enable
3693 *
3694 * @adev: amdgpu_device pointer
3695 *
3696 * Initialize compute vmid sh_mem registers
3697 *
3698 */
3699static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3700{
3701        int i;
3702        uint32_t sh_mem_config;
3703        uint32_t sh_mem_bases;
3704
3705        /*
3706         * Configure apertures:
3707         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3708         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3709         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3710         */
3711        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3712
3713        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3714                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3715                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3716                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3717                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3718                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3719
3720        mutex_lock(&adev->srbm_mutex);
3721        for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3722                vi_srbm_select(adev, 0, 0, 0, i);
3723                /* CP and shaders */
3724                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3725                WREG32(mmSH_MEM_APE1_BASE, 1);
3726                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3727                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3728        }
3729        vi_srbm_select(adev, 0, 0, 0, 0);
3730        mutex_unlock(&adev->srbm_mutex);
3731
3732        /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3733           acccess. These should be enabled by FW for target VMIDs. */
3734        for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3735                WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3736                WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3737                WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3738                WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3739        }
3740}
3741
3742static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3743{
3744        int vmid;
3745
3746        /*
3747         * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3748         * access. Compute VMIDs should be enabled by FW for target VMIDs,
3749         * the driver can enable them for graphics. VMID0 should maintain
3750         * access so that HWS firmware can save/restore entries.
3751         */
3752        for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3753                WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3754                WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3755                WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3756                WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3757        }
3758}
3759
3760static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3761{
3762        switch (adev->asic_type) {
3763        default:
3764                adev->gfx.config.double_offchip_lds_buf = 1;
3765                break;
3766        case CHIP_CARRIZO:
3767        case CHIP_STONEY:
3768                adev->gfx.config.double_offchip_lds_buf = 0;
3769                break;
3770        }
3771}
3772
3773static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3774{
3775        u32 tmp, sh_static_mem_cfg;
3776        int i;
3777
3778        WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3779        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3781        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3782
3783        gfx_v8_0_tiling_mode_table_init(adev);
3784        gfx_v8_0_setup_rb(adev);
3785        gfx_v8_0_get_cu_info(adev);
3786        gfx_v8_0_config_init(adev);
3787
3788        /* XXX SH_MEM regs */
3789        /* where to put LDS, scratch, GPUVM in FSA64 space */
3790        sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3791                                   SWIZZLE_ENABLE, 1);
3792        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3793                                   ELEMENT_SIZE, 1);
3794        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3795                                   INDEX_STRIDE, 3);
3796        WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3797
3798        mutex_lock(&adev->srbm_mutex);
3799        for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3800                vi_srbm_select(adev, 0, 0, 0, i);
3801                /* CP and shaders */
3802                if (i == 0) {
3803                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3804                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807                        WREG32(mmSH_MEM_CONFIG, tmp);
3808                        WREG32(mmSH_MEM_BASES, 0);
3809                } else {
3810                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3811                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814                        WREG32(mmSH_MEM_CONFIG, tmp);
3815                        tmp = adev->gmc.shared_aperture_start >> 48;
3816                        WREG32(mmSH_MEM_BASES, tmp);
3817                }
3818
3819                WREG32(mmSH_MEM_APE1_BASE, 1);
3820                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821        }
3822        vi_srbm_select(adev, 0, 0, 0, 0);
3823        mutex_unlock(&adev->srbm_mutex);
3824
3825        gfx_v8_0_init_compute_vmid(adev);
3826        gfx_v8_0_init_gds_vmid(adev);
3827
3828        mutex_lock(&adev->grbm_idx_mutex);
3829        /*
3830         * making sure that the following register writes will be broadcasted
3831         * to all the shaders
3832         */
3833        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834
3835        WREG32(mmPA_SC_FIFO_SIZE,
3836                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3837                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3838                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3839                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3840                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3841                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3842                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3843                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3844
3845        tmp = RREG32(mmSPI_ARB_PRIORITY);
3846        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3847        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3848        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3849        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3850        WREG32(mmSPI_ARB_PRIORITY, tmp);
3851
3852        mutex_unlock(&adev->grbm_idx_mutex);
3853
3854}
3855
3856static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3857{
3858        u32 i, j, k;
3859        u32 mask;
3860
3861        mutex_lock(&adev->grbm_idx_mutex);
3862        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3863                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3864                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3865                        for (k = 0; k < adev->usec_timeout; k++) {
3866                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3867                                        break;
3868                                udelay(1);
3869                        }
3870                        if (k == adev->usec_timeout) {
3871                                gfx_v8_0_select_se_sh(adev, 0xffffffff,
3872                                                      0xffffffff, 0xffffffff);
3873                                mutex_unlock(&adev->grbm_idx_mutex);
3874                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3875                                         i, j);
3876                                return;
3877                        }
3878                }
3879        }
3880        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3881        mutex_unlock(&adev->grbm_idx_mutex);
3882
3883        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3884                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3885                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3886                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3887        for (k = 0; k < adev->usec_timeout; k++) {
3888                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3889                        break;
3890                udelay(1);
3891        }
3892}
3893
3894static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3895                                               bool enable)
3896{
3897        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898
3899        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3900        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3901        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3902        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903
3904        WREG32(mmCP_INT_CNTL_RING0, tmp);
3905}
3906
3907static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908{
3909        adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3910        /* csib */
3911        WREG32(mmRLC_CSIB_ADDR_HI,
3912                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3913        WREG32(mmRLC_CSIB_ADDR_LO,
3914                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3915        WREG32(mmRLC_CSIB_LENGTH,
3916                        adev->gfx.rlc.clear_state_size);
3917}
3918
3919static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3920                                int ind_offset,
3921                                int list_size,
3922                                int *unique_indices,
3923                                int *indices_count,
3924                                int max_indices,
3925                                int *ind_start_offsets,
3926                                int *offset_count,
3927                                int max_offset)
3928{
3929        int indices;
3930        bool new_entry = true;
3931
3932        for (; ind_offset < list_size; ind_offset++) {
3933
3934                if (new_entry) {
3935                        new_entry = false;
3936                        ind_start_offsets[*offset_count] = ind_offset;
3937                        *offset_count = *offset_count + 1;
3938                        BUG_ON(*offset_count >= max_offset);
3939                }
3940
3941                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3942                        new_entry = true;
3943                        continue;
3944                }
3945
3946                ind_offset += 2;
3947
3948                /* look for the matching indice */
3949                for (indices = 0;
3950                        indices < *indices_count;
3951                        indices++) {
3952                        if (unique_indices[indices] ==
3953                                register_list_format[ind_offset])
3954                                break;
3955                }
3956
3957                if (indices >= *indices_count) {
3958                        unique_indices[*indices_count] =
3959                                register_list_format[ind_offset];
3960                        indices = *indices_count;
3961                        *indices_count = *indices_count + 1;
3962                        BUG_ON(*indices_count >= max_indices);
3963                }
3964
3965                register_list_format[ind_offset] = indices;
3966        }
3967}
3968
3969static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3970{
3971        int i, temp, data;
3972        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3973        int indices_count = 0;
3974        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3975        int offset_count = 0;
3976
3977        int list_size;
3978        unsigned int *register_list_format =
3979                kmemdup(adev->gfx.rlc.register_list_format,
3980                        adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3981        if (!register_list_format)
3982                return -ENOMEM;
3983
3984        gfx_v8_0_parse_ind_reg_list(register_list_format,
3985                                RLC_FormatDirectRegListLength,
3986                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3987                                unique_indices,
3988                                &indices_count,
3989                                ARRAY_SIZE(unique_indices),
3990                                indirect_start_offsets,
3991                                &offset_count,
3992                                ARRAY_SIZE(indirect_start_offsets));
3993
3994        /* save and restore list */
3995        WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996
3997        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3998        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3999                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4000
4001        /* indirect list */
4002        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4003        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4004                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005
4006        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4007        list_size = list_size >> 1;
4008        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4009        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010
4011        /* starting offsets starts */
4012        WREG32(mmRLC_GPM_SCRATCH_ADDR,
4013                adev->gfx.rlc.starting_offsets_start);
4014        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4015                WREG32(mmRLC_GPM_SCRATCH_DATA,
4016                                indirect_start_offsets[i]);
4017
4018        /* unique indices */
4019        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4020        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4021        for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4022                if (unique_indices[i] != 0) {
4023                        WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4024                        WREG32(data + i, unique_indices[i] >> 20);
4025                }
4026        }
4027        kfree(register_list_format);
4028
4029        return 0;
4030}
4031
4032static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033{
4034        WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4035}
4036
4037static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4038{
4039        uint32_t data;
4040
4041        WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042
4043        data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4044        data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4045        data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4046        data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4047        WREG32(mmRLC_PG_DELAY, data);
4048
4049        WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4050        WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4051
4052}
4053
4054static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4055                                                bool enable)
4056{
4057        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4058}
4059
4060static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4061                                                  bool enable)
4062{
4063        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4064}
4065
4066static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067{
4068        WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4069}
4070
4071static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072{
4073        if ((adev->asic_type == CHIP_CARRIZO) ||
4074            (adev->asic_type == CHIP_STONEY)) {
4075                gfx_v8_0_init_csb(adev);
4076                gfx_v8_0_init_save_restore_list(adev);
4077                gfx_v8_0_enable_save_restore_machine(adev);
4078                WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4079                gfx_v8_0_init_power_gating(adev);
4080                WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4081        } else if ((adev->asic_type == CHIP_POLARIS11) ||
4082                   (adev->asic_type == CHIP_POLARIS12) ||
4083                   (adev->asic_type == CHIP_VEGAM)) {
4084                gfx_v8_0_init_csb(adev);
4085                gfx_v8_0_init_save_restore_list(adev);
4086                gfx_v8_0_enable_save_restore_machine(adev);
4087                gfx_v8_0_init_power_gating(adev);
4088        }
4089
4090}
4091
4092static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4093{
4094        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4095
4096        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4097        gfx_v8_0_wait_for_rlc_serdes(adev);
4098}
4099
4100static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4101{
4102        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103        udelay(50);
4104
4105        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4106        udelay(50);
4107}
4108
4109static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4110{
4111        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4112
4113        /* carrizo do enable cp interrupt after cp inited */
4114        if (!(adev->flags & AMD_IS_APU))
4115                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4116
4117        udelay(50);
4118}
4119
4120static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4121{
4122        if (amdgpu_sriov_vf(adev)) {
4123                gfx_v8_0_init_csb(adev);
4124                return 0;
4125        }
4126
4127        adev->gfx.rlc.funcs->stop(adev);
4128        adev->gfx.rlc.funcs->reset(adev);
4129        gfx_v8_0_init_pg(adev);
4130        adev->gfx.rlc.funcs->start(adev);
4131
4132        return 0;
4133}
4134
4135static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4136{
4137        u32 tmp = RREG32(mmCP_ME_CNTL);
4138
4139        if (enable) {
4140                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4141                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4142                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143        } else {
4144                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4145                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4146                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4147        }
4148        WREG32(mmCP_ME_CNTL, tmp);
4149        udelay(50);
4150}
4151
4152static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4153{
4154        u32 count = 0;
4155        const struct cs_section_def *sect = NULL;
4156        const struct cs_extent_def *ext = NULL;
4157
4158        /* begin clear state */
4159        count += 2;
4160        /* context control state */
4161        count += 3;
4162
4163        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4164                for (ext = sect->section; ext->extent != NULL; ++ext) {
4165                        if (sect->id == SECT_CONTEXT)
4166                                count += 2 + ext->reg_count;
4167                        else
4168                                return 0;
4169                }
4170        }
4171        /* pa_sc_raster_config/pa_sc_raster_config1 */
4172        count += 4;
4173        /* end clear state */
4174        count += 2;
4175        /* clear state */
4176        count += 2;
4177
4178        return count;
4179}
4180
4181static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4182{
4183        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4184        const struct cs_section_def *sect = NULL;
4185        const struct cs_extent_def *ext = NULL;
4186        int r, i;
4187
4188        /* init the CP */
4189        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4190        WREG32(mmCP_ENDIAN_SWAP, 0);
4191        WREG32(mmCP_DEVICE_ID, 1);
4192
4193        gfx_v8_0_cp_gfx_enable(adev, true);
4194
4195        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4196        if (r) {
4197                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4198                return r;
4199        }
4200
4201        /* clear state buffer */
4202        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4203        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4204
4205        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4206        amdgpu_ring_write(ring, 0x80000000);
4207        amdgpu_ring_write(ring, 0x80000000);
4208
4209        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4210                for (ext = sect->section; ext->extent != NULL; ++ext) {
4211                        if (sect->id == SECT_CONTEXT) {
4212                                amdgpu_ring_write(ring,
4213                                       PACKET3(PACKET3_SET_CONTEXT_REG,
4214                                               ext->reg_count));
4215                                amdgpu_ring_write(ring,
4216                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4217                                for (i = 0; i < ext->reg_count; i++)
4218                                        amdgpu_ring_write(ring, ext->extent[i]);
4219                        }
4220                }
4221        }
4222
4223        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4224        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4225        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4226        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4227
4228        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4229        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4230
4231        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4232        amdgpu_ring_write(ring, 0);
4233
4234        /* init the CE partitions */
4235        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4236        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4237        amdgpu_ring_write(ring, 0x8000);
4238        amdgpu_ring_write(ring, 0x8000);
4239
4240        amdgpu_ring_commit(ring);
4241
4242        return 0;
4243}
4244static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4245{
4246        u32 tmp;
4247        /* no gfx doorbells on iceland */
4248        if (adev->asic_type == CHIP_TOPAZ)
4249                return;
4250
4251        tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4252
4253        if (ring->use_doorbell) {
4254                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4255                                DOORBELL_OFFSET, ring->doorbell_index);
4256                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257                                                DOORBELL_HIT, 0);
4258                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4259                                            DOORBELL_EN, 1);
4260        } else {
4261                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4262        }
4263
4264        WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4265
4266        if (adev->flags & AMD_IS_APU)
4267                return;
4268
4269        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4270                                        DOORBELL_RANGE_LOWER,
4271                                        adev->doorbell_index.gfx_ring0);
4272        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4273
4274        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4275                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4276}
4277
4278static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4279{
4280        struct amdgpu_ring *ring;
4281        u32 tmp;
4282        u32 rb_bufsz;
4283        u64 rb_addr, rptr_addr, wptr_gpu_addr;
4284
4285        /* Set the write pointer delay */
4286        WREG32(mmCP_RB_WPTR_DELAY, 0);
4287
4288        /* set the RB to use vmid 0 */
4289        WREG32(mmCP_RB_VMID, 0);
4290
4291        /* Set ring buffer size */
4292        ring = &adev->gfx.gfx_ring[0];
4293        rb_bufsz = order_base_2(ring->ring_size / 8);
4294        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4295        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4296        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4297        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298#ifdef __BIG_ENDIAN
4299        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300#endif
4301        WREG32(mmCP_RB0_CNTL, tmp);
4302
4303        /* Initialize the ring buffer's read and write pointers */
4304        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305        ring->wptr = 0;
4306        WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307
4308        /* set the wb address wether it's enabled or not */
4309        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4310        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4311        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312
4313        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4314        WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4315        WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316        mdelay(1);
4317        WREG32(mmCP_RB0_CNTL, tmp);
4318
4319        rb_addr = ring->gpu_addr >> 8;
4320        WREG32(mmCP_RB0_BASE, rb_addr);
4321        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322
4323        gfx_v8_0_set_cpg_door_bell(adev, ring);
4324        /* start the ring */
4325        amdgpu_ring_clear_ring(ring);
4326        gfx_v8_0_cp_gfx_start(adev);
4327        ring->sched.ready = true;
4328
4329        return 0;
4330}
4331
4332static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4333{
4334        if (enable) {
4335                WREG32(mmCP_MEC_CNTL, 0);
4336        } else {
4337                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4338                adev->gfx.kiq.ring.sched.ready = false;
4339        }
4340        udelay(50);
4341}
4342
4343/* KIQ functions */
4344static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4345{
4346        uint32_t tmp;
4347        struct amdgpu_device *adev = ring->adev;
4348
4349        /* tell RLC which is KIQ queue */
4350        tmp = RREG32(mmRLC_CP_SCHEDULERS);
4351        tmp &= 0xffffff00;
4352        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4353        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354        tmp |= 0x80;
4355        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4356}
4357
4358static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4359{
4360        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4361        uint64_t queue_mask = 0;
4362        int r, i;
4363
4364        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4365                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4366                        continue;
4367
4368                /* This situation may be hit in the future if a new HW
4369                 * generation exposes more than 64 queues. If so, the
4370                 * definition of queue_mask needs updating */
4371                if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4372                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4373                        break;
4374                }
4375
4376                queue_mask |= (1ull << i);
4377        }
4378
4379        r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4380        if (r) {
4381                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4382                return r;
4383        }
4384        /* set resources */
4385        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4386        amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4387        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4388        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4389        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4390        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4391        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4392        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4393        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4394                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4395                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4396                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4397
4398                /* map queues */
4399                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4400                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4401                amdgpu_ring_write(kiq_ring,
4402                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4403                amdgpu_ring_write(kiq_ring,
4404                                  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4405                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4406                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4407                                  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4408                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4409                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4410                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4411                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4412        }
4413
4414        amdgpu_ring_commit(kiq_ring);
4415
4416        return 0;
4417}
4418
4419static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4420{
4421        int i, r = 0;
4422
4423        if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4424                WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4425                for (i = 0; i < adev->usec_timeout; i++) {
4426                        if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4427                                break;
4428                        udelay(1);
4429                }
4430                if (i == adev->usec_timeout)
4431                        r = -ETIMEDOUT;
4432        }
4433        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4434        WREG32(mmCP_HQD_PQ_RPTR, 0);
4435        WREG32(mmCP_HQD_PQ_WPTR, 0);
4436
4437        return r;
4438}
4439
4440static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4441{
4442        struct amdgpu_device *adev = ring->adev;
4443
4444        if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4445                if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
4446                                                              ring->queue)) {
4447                        mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4448                        mqd->cp_hqd_queue_priority =
4449                                AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4450                }
4451        }
4452}
4453
4454static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4455{
4456        struct amdgpu_device *adev = ring->adev;
4457        struct vi_mqd *mqd = ring->mqd_ptr;
4458        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4459        uint32_t tmp;
4460
4461        mqd->header = 0xC0310800;
4462        mqd->compute_pipelinestat_enable = 0x00000001;
4463        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4464        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4465        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4466        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4467        mqd->compute_misc_reserved = 0x00000003;
4468        mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4469                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4470        mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4471                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4472        eop_base_addr = ring->eop_gpu_addr >> 8;
4473        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4474        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4475
4476        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4477        tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4478        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4479                        (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4480
4481        mqd->cp_hqd_eop_control = tmp;
4482
4483        /* enable doorbell? */
4484        tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4485                            CP_HQD_PQ_DOORBELL_CONTROL,
4486                            DOORBELL_EN,
4487                            ring->use_doorbell ? 1 : 0);
4488
4489        mqd->cp_hqd_pq_doorbell_control = tmp;
4490
4491        /* set the pointer to the MQD */
4492        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4493        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4494
4495        /* set MQD vmid to 0 */
4496        tmp = RREG32(mmCP_MQD_CONTROL);
4497        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4498        mqd->cp_mqd_control = tmp;
4499
4500        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4501        hqd_gpu_addr = ring->gpu_addr >> 8;
4502        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4503        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4504
4505        /* set up the HQD, this is similar to CP_RB0_CNTL */
4506        tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4507        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4508                            (order_base_2(ring->ring_size / 4) - 1));
4509        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4510                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4511#ifdef __BIG_ENDIAN
4512        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4513#endif
4514        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4515        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4516        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4517        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4518        mqd->cp_hqd_pq_control = tmp;
4519
4520        /* set the wb address whether it's enabled or not */
4521        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4522        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4523        mqd->cp_hqd_pq_rptr_report_addr_hi =
4524                upper_32_bits(wb_gpu_addr) & 0xffff;
4525
4526        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4527        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4528        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4529        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4530
4531        tmp = 0;
4532        /* enable the doorbell if requested */
4533        if (ring->use_doorbell) {
4534                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4535                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4536                                DOORBELL_OFFSET, ring->doorbell_index);
4537
4538                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4539                                         DOORBELL_EN, 1);
4540                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4541                                         DOORBELL_SOURCE, 0);
4542                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4543                                         DOORBELL_HIT, 0);
4544        }
4545
4546        mqd->cp_hqd_pq_doorbell_control = tmp;
4547
4548        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4549        ring->wptr = 0;
4550        mqd->cp_hqd_pq_wptr = ring->wptr;
4551        mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4552
4553        /* set the vmid for the queue */
4554        mqd->cp_hqd_vmid = 0;
4555
4556        tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4557        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4558        mqd->cp_hqd_persistent_state = tmp;
4559
4560        /* set MTYPE */
4561        tmp = RREG32(mmCP_HQD_IB_CONTROL);
4562        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4563        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4564        mqd->cp_hqd_ib_control = tmp;
4565
4566        tmp = RREG32(mmCP_HQD_IQ_TIMER);
4567        tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4568        mqd->cp_hqd_iq_timer = tmp;
4569
4570        tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4571        tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4572        mqd->cp_hqd_ctx_save_control = tmp;
4573
4574        /* defaults */
4575        mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4576        mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4577        mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4578        mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4579        mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4580        mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4581        mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4582        mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4583        mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4584        mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4585        mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4586        mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4587
4588        /* set static priority for a queue/ring */
4589        gfx_v8_0_mqd_set_priority(ring, mqd);
4590        mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4591
4592        /* map_queues packet doesn't need activate the queue,
4593         * so only kiq need set this field.
4594         */
4595        if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4596                mqd->cp_hqd_active = 1;
4597
4598        return 0;
4599}
4600
4601static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4602                        struct vi_mqd *mqd)
4603{
4604        uint32_t mqd_reg;
4605        uint32_t *mqd_data;
4606
4607        /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4608        mqd_data = &mqd->cp_mqd_base_addr_lo;
4609
4610        /* disable wptr polling */
4611        WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4612
4613        /* program all HQD registers */
4614        for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4615                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4616
4617        /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4618         * This is safe since EOP RPTR==WPTR for any inactive HQD
4619         * on ASICs that do not support context-save.
4620         * EOP writes/reads can start anywhere in the ring.
4621         */
4622        if (adev->asic_type != CHIP_TONGA) {
4623                WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4624                WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4625                WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4626        }
4627
4628        for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4629                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4630
4631        /* activate the HQD */
4632        for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4633                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4634
4635        return 0;
4636}
4637
4638static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4639{
4640        struct amdgpu_device *adev = ring->adev;
4641        struct vi_mqd *mqd = ring->mqd_ptr;
4642        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4643
4644        gfx_v8_0_kiq_setting(ring);
4645
4646        if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4647                /* reset MQD to a clean status */
4648                if (adev->gfx.mec.mqd_backup[mqd_idx])
4649                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4650
4651                /* reset ring buffer */
4652                ring->wptr = 0;
4653                amdgpu_ring_clear_ring(ring);
4654                mutex_lock(&adev->srbm_mutex);
4655                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4656                gfx_v8_0_mqd_commit(adev, mqd);
4657                vi_srbm_select(adev, 0, 0, 0, 0);
4658                mutex_unlock(&adev->srbm_mutex);
4659        } else {
4660                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4661                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4662                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4663                mutex_lock(&adev->srbm_mutex);
4664                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4665                gfx_v8_0_mqd_init(ring);
4666                gfx_v8_0_mqd_commit(adev, mqd);
4667                vi_srbm_select(adev, 0, 0, 0, 0);
4668                mutex_unlock(&adev->srbm_mutex);
4669
4670                if (adev->gfx.mec.mqd_backup[mqd_idx])
4671                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4672        }
4673
4674        return 0;
4675}
4676
4677static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4678{
4679        struct amdgpu_device *adev = ring->adev;
4680        struct vi_mqd *mqd = ring->mqd_ptr;
4681        int mqd_idx = ring - &adev->gfx.compute_ring[0];
4682
4683        if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4684                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4685                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4686                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4687                mutex_lock(&adev->srbm_mutex);
4688                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4689                gfx_v8_0_mqd_init(ring);
4690                vi_srbm_select(adev, 0, 0, 0, 0);
4691                mutex_unlock(&adev->srbm_mutex);
4692
4693                if (adev->gfx.mec.mqd_backup[mqd_idx])
4694                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4695        } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4696                /* reset MQD to a clean status */
4697                if (adev->gfx.mec.mqd_backup[mqd_idx])
4698                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4699                /* reset ring buffer */
4700                ring->wptr = 0;
4701                amdgpu_ring_clear_ring(ring);
4702        } else {
4703                amdgpu_ring_clear_ring(ring);
4704        }
4705        return 0;
4706}
4707
4708static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4709{
4710        if (adev->asic_type > CHIP_TONGA) {
4711                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4712                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4713        }
4714        /* enable doorbells */
4715        WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4716}
4717
4718static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4719{
4720        struct amdgpu_ring *ring;
4721        int r;
4722
4723        ring = &adev->gfx.kiq.ring;
4724
4725        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4726        if (unlikely(r != 0))
4727                return r;
4728
4729        r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4730        if (unlikely(r != 0))
4731                return r;
4732
4733        gfx_v8_0_kiq_init_queue(ring);
4734        amdgpu_bo_kunmap(ring->mqd_obj);
4735        ring->mqd_ptr = NULL;
4736        amdgpu_bo_unreserve(ring->mqd_obj);
4737        ring->sched.ready = true;
4738        return 0;
4739}
4740
4741static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4742{
4743        struct amdgpu_ring *ring = NULL;
4744        int r = 0, i;
4745
4746        gfx_v8_0_cp_compute_enable(adev, true);
4747
4748        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4749                ring = &adev->gfx.compute_ring[i];
4750
4751                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4752                if (unlikely(r != 0))
4753                        goto done;
4754                r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4755                if (!r) {
4756                        r = gfx_v8_0_kcq_init_queue(ring);
4757                        amdgpu_bo_kunmap(ring->mqd_obj);
4758                        ring->mqd_ptr = NULL;
4759                }
4760                amdgpu_bo_unreserve(ring->mqd_obj);
4761                if (r)
4762                        goto done;
4763        }
4764
4765        gfx_v8_0_set_mec_doorbell_range(adev);
4766
4767        r = gfx_v8_0_kiq_kcq_enable(adev);
4768        if (r)
4769                goto done;
4770
4771done:
4772        return r;
4773}
4774
4775static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4776{
4777        int r, i;
4778        struct amdgpu_ring *ring;
4779
4780        /* collect all the ring_tests here, gfx, kiq, compute */
4781        ring = &adev->gfx.gfx_ring[0];
4782        r = amdgpu_ring_test_helper(ring);
4783        if (r)
4784                return r;
4785
4786        ring = &adev->gfx.kiq.ring;
4787        r = amdgpu_ring_test_helper(ring);
4788        if (r)
4789                return r;
4790
4791        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4792                ring = &adev->gfx.compute_ring[i];
4793                amdgpu_ring_test_helper(ring);
4794        }
4795
4796        return 0;
4797}
4798
4799static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4800{
4801        int r;
4802
4803        if (!(adev->flags & AMD_IS_APU))
4804                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4805
4806        r = gfx_v8_0_kiq_resume(adev);
4807        if (r)
4808                return r;
4809
4810        r = gfx_v8_0_cp_gfx_resume(adev);
4811        if (r)
4812                return r;
4813
4814        r = gfx_v8_0_kcq_resume(adev);
4815        if (r)
4816                return r;
4817
4818        r = gfx_v8_0_cp_test_all_rings(adev);
4819        if (r)
4820                return r;
4821
4822        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4823
4824        return 0;
4825}
4826
4827static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4828{
4829        gfx_v8_0_cp_gfx_enable(adev, enable);
4830        gfx_v8_0_cp_compute_enable(adev, enable);
4831}
4832
4833static int gfx_v8_0_hw_init(void *handle)
4834{
4835        int r;
4836        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
4838        gfx_v8_0_init_golden_registers(adev);
4839        gfx_v8_0_constants_init(adev);
4840
4841        r = adev->gfx.rlc.funcs->resume(adev);
4842        if (r)
4843                return r;
4844
4845        r = gfx_v8_0_cp_resume(adev);
4846
4847        return r;
4848}
4849
4850static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4851{
4852        int r, i;
4853        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4854
4855        r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4856        if (r)
4857                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4858
4859        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4860                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4861
4862                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4863                amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4864                                                PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4865                                                PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4866                                                PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4867                                                PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4868                amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4869                amdgpu_ring_write(kiq_ring, 0);
4870                amdgpu_ring_write(kiq_ring, 0);
4871                amdgpu_ring_write(kiq_ring, 0);
4872        }
4873        r = amdgpu_ring_test_helper(kiq_ring);
4874        if (r)
4875                DRM_ERROR("KCQ disable failed\n");
4876
4877        return r;
4878}
4879
4880static bool gfx_v8_0_is_idle(void *handle)
4881{
4882        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883
4884        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4885                || RREG32(mmGRBM_STATUS2) != 0x8)
4886                return false;
4887        else
4888                return true;
4889}
4890
4891static bool gfx_v8_0_rlc_is_idle(void *handle)
4892{
4893        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4894
4895        if (RREG32(mmGRBM_STATUS2) != 0x8)
4896                return false;
4897        else
4898                return true;
4899}
4900
4901static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4902{
4903        unsigned int i;
4904        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4905
4906        for (i = 0; i < adev->usec_timeout; i++) {
4907                if (gfx_v8_0_rlc_is_idle(handle))
4908                        return 0;
4909
4910                udelay(1);
4911        }
4912        return -ETIMEDOUT;
4913}
4914
4915static int gfx_v8_0_wait_for_idle(void *handle)
4916{
4917        unsigned int i;
4918        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4919
4920        for (i = 0; i < adev->usec_timeout; i++) {
4921                if (gfx_v8_0_is_idle(handle))
4922                        return 0;
4923
4924                udelay(1);
4925        }
4926        return -ETIMEDOUT;
4927}
4928
4929static int gfx_v8_0_hw_fini(void *handle)
4930{
4931        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4932
4933        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4934        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4935
4936        amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4937
4938        amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4939
4940        /* disable KCQ to avoid CPC touch memory not valid anymore */
4941        gfx_v8_0_kcq_disable(adev);
4942
4943        if (amdgpu_sriov_vf(adev)) {
4944                pr_debug("For SRIOV client, shouldn't do anything.\n");
4945                return 0;
4946        }
4947        amdgpu_gfx_rlc_enter_safe_mode(adev);
4948        if (!gfx_v8_0_wait_for_idle(adev))
4949                gfx_v8_0_cp_enable(adev, false);
4950        else
4951                pr_err("cp is busy, skip halt cp\n");
4952        if (!gfx_v8_0_wait_for_rlc_idle(adev))
4953                adev->gfx.rlc.funcs->stop(adev);
4954        else
4955                pr_err("rlc is busy, skip halt rlc\n");
4956        amdgpu_gfx_rlc_exit_safe_mode(adev);
4957
4958        return 0;
4959}
4960
4961static int gfx_v8_0_suspend(void *handle)
4962{
4963        return gfx_v8_0_hw_fini(handle);
4964}
4965
4966static int gfx_v8_0_resume(void *handle)
4967{
4968        return gfx_v8_0_hw_init(handle);
4969}
4970
4971static bool gfx_v8_0_check_soft_reset(void *handle)
4972{
4973        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4974        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4975        u32 tmp;
4976
4977        /* GRBM_STATUS */
4978        tmp = RREG32(mmGRBM_STATUS);
4979        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4980                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4981                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4982                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4983                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4984                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4985                   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4986                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4987                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4988                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4989                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4990                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4991                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4992        }
4993
4994        /* GRBM_STATUS2 */
4995        tmp = RREG32(mmGRBM_STATUS2);
4996        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4997                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4998                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4999
5000        if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5001            REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5002            REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5003                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5004                                                SOFT_RESET_CPF, 1);
5005                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5006                                                SOFT_RESET_CPC, 1);
5007                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5008                                                SOFT_RESET_CPG, 1);
5009                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5010                                                SOFT_RESET_GRBM, 1);
5011        }
5012
5013        /* SRBM_STATUS */
5014        tmp = RREG32(mmSRBM_STATUS);
5015        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5016                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5017                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5018        if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5019                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5020                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5021
5022        if (grbm_soft_reset || srbm_soft_reset) {
5023                adev->gfx.grbm_soft_reset = grbm_soft_reset;
5024                adev->gfx.srbm_soft_reset = srbm_soft_reset;
5025                return true;
5026        } else {
5027                adev->gfx.grbm_soft_reset = 0;
5028                adev->gfx.srbm_soft_reset = 0;
5029                return false;
5030        }
5031}
5032
5033static int gfx_v8_0_pre_soft_reset(void *handle)
5034{
5035        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5036        u32 grbm_soft_reset = 0;
5037
5038        if ((!adev->gfx.grbm_soft_reset) &&
5039            (!adev->gfx.srbm_soft_reset))
5040                return 0;
5041
5042        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5043
5044        /* stop the rlc */
5045        adev->gfx.rlc.funcs->stop(adev);
5046
5047        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5048            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5049                /* Disable GFX parsing/prefetching */
5050                gfx_v8_0_cp_gfx_enable(adev, false);
5051
5052        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5053            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5054            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5055            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5056                int i;
5057
5058                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5059                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5060
5061                        mutex_lock(&adev->srbm_mutex);
5062                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5063                        gfx_v8_0_deactivate_hqd(adev, 2);
5064                        vi_srbm_select(adev, 0, 0, 0, 0);
5065                        mutex_unlock(&adev->srbm_mutex);
5066                }
5067                /* Disable MEC parsing/prefetching */
5068                gfx_v8_0_cp_compute_enable(adev, false);
5069        }
5070
5071        return 0;
5072}
5073
5074static int gfx_v8_0_soft_reset(void *handle)
5075{
5076        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5077        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5078        u32 tmp;
5079
5080        if ((!adev->gfx.grbm_soft_reset) &&
5081            (!adev->gfx.srbm_soft_reset))
5082                return 0;
5083
5084        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5085        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5086
5087        if (grbm_soft_reset || srbm_soft_reset) {
5088                tmp = RREG32(mmGMCON_DEBUG);
5089                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5090                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5091                WREG32(mmGMCON_DEBUG, tmp);
5092                udelay(50);
5093        }
5094
5095        if (grbm_soft_reset) {
5096                tmp = RREG32(mmGRBM_SOFT_RESET);
5097                tmp |= grbm_soft_reset;
5098                dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5099                WREG32(mmGRBM_SOFT_RESET, tmp);
5100                tmp = RREG32(mmGRBM_SOFT_RESET);
5101
5102                udelay(50);
5103
5104                tmp &= ~grbm_soft_reset;
5105                WREG32(mmGRBM_SOFT_RESET, tmp);
5106                tmp = RREG32(mmGRBM_SOFT_RESET);
5107        }
5108
5109        if (srbm_soft_reset) {
5110                tmp = RREG32(mmSRBM_SOFT_RESET);
5111                tmp |= srbm_soft_reset;
5112                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5113                WREG32(mmSRBM_SOFT_RESET, tmp);
5114                tmp = RREG32(mmSRBM_SOFT_RESET);
5115
5116                udelay(50);
5117
5118                tmp &= ~srbm_soft_reset;
5119                WREG32(mmSRBM_SOFT_RESET, tmp);
5120                tmp = RREG32(mmSRBM_SOFT_RESET);
5121        }
5122
5123        if (grbm_soft_reset || srbm_soft_reset) {
5124                tmp = RREG32(mmGMCON_DEBUG);
5125                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5126                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5127                WREG32(mmGMCON_DEBUG, tmp);
5128        }
5129
5130        /* Wait a little for things to settle down */
5131        udelay(50);
5132
5133        return 0;
5134}
5135
5136static int gfx_v8_0_post_soft_reset(void *handle)
5137{
5138        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5139        u32 grbm_soft_reset = 0;
5140
5141        if ((!adev->gfx.grbm_soft_reset) &&
5142            (!adev->gfx.srbm_soft_reset))
5143                return 0;
5144
5145        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5146
5147        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5148            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5149            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5150            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5151                int i;
5152
5153                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5154                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5155
5156                        mutex_lock(&adev->srbm_mutex);
5157                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5158                        gfx_v8_0_deactivate_hqd(adev, 2);
5159                        vi_srbm_select(adev, 0, 0, 0, 0);
5160                        mutex_unlock(&adev->srbm_mutex);
5161                }
5162                gfx_v8_0_kiq_resume(adev);
5163                gfx_v8_0_kcq_resume(adev);
5164        }
5165
5166        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5167            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5168                gfx_v8_0_cp_gfx_resume(adev);
5169
5170        gfx_v8_0_cp_test_all_rings(adev);
5171
5172        adev->gfx.rlc.funcs->start(adev);
5173
5174        return 0;
5175}
5176
5177/**
5178 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5179 *
5180 * @adev: amdgpu_device pointer
5181 *
5182 * Fetches a GPU clock counter snapshot.
5183 * Returns the 64 bit clock counter snapshot.
5184 */
5185static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5186{
5187        uint64_t clock;
5188
5189        mutex_lock(&adev->gfx.gpu_clock_mutex);
5190        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5191        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5192                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5193        mutex_unlock(&adev->gfx.gpu_clock_mutex);
5194        return clock;
5195}
5196
5197static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5198                                          uint32_t vmid,
5199                                          uint32_t gds_base, uint32_t gds_size,
5200                                          uint32_t gws_base, uint32_t gws_size,
5201                                          uint32_t oa_base, uint32_t oa_size)
5202{
5203        /* GDS Base */
5204        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5205        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5206                                WRITE_DATA_DST_SEL(0)));
5207        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5208        amdgpu_ring_write(ring, 0);
5209        amdgpu_ring_write(ring, gds_base);
5210
5211        /* GDS Size */
5212        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5213        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5214                                WRITE_DATA_DST_SEL(0)));
5215        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5216        amdgpu_ring_write(ring, 0);
5217        amdgpu_ring_write(ring, gds_size);
5218
5219        /* GWS */
5220        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5221        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5222                                WRITE_DATA_DST_SEL(0)));
5223        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5224        amdgpu_ring_write(ring, 0);
5225        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5226
5227        /* OA */
5228        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5229        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5230                                WRITE_DATA_DST_SEL(0)));
5231        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5232        amdgpu_ring_write(ring, 0);
5233        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5234}
5235
5236static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5237{
5238        WREG32(mmSQ_IND_INDEX,
5239                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5240                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5241                (address << SQ_IND_INDEX__INDEX__SHIFT) |
5242                (SQ_IND_INDEX__FORCE_READ_MASK));
5243        return RREG32(mmSQ_IND_DATA);
5244}
5245
5246static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5247                           uint32_t wave, uint32_t thread,
5248                           uint32_t regno, uint32_t num, uint32_t *out)
5249{
5250        WREG32(mmSQ_IND_INDEX,
5251                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5252                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5253                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5254                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5255                (SQ_IND_INDEX__FORCE_READ_MASK) |
5256                (SQ_IND_INDEX__AUTO_INCR_MASK));
5257        while (num--)
5258                *(out++) = RREG32(mmSQ_IND_DATA);
5259}
5260
5261static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5262{
5263        /* type 0 wave data */
5264        dst[(*no_fields)++] = 0;
5265        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5266        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5267        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5268        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5269        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5270        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5271        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5272        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5273        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5274        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5275        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5276        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5277        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5278        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5279        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5280        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5281        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5282        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5283}
5284
5285static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5286                                     uint32_t wave, uint32_t start,
5287                                     uint32_t size, uint32_t *dst)
5288{
5289        wave_read_regs(
5290                adev, simd, wave, 0,
5291                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5292}
5293
5294
5295static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5296        .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5297        .select_se_sh = &gfx_v8_0_select_se_sh,
5298        .read_wave_data = &gfx_v8_0_read_wave_data,
5299        .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5300        .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5301};
5302
5303static int gfx_v8_0_early_init(void *handle)
5304{
5305        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306
5307        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5308        adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5309                                          AMDGPU_MAX_COMPUTE_RINGS);
5310        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5311        gfx_v8_0_set_ring_funcs(adev);
5312        gfx_v8_0_set_irq_funcs(adev);
5313        gfx_v8_0_set_gds_init(adev);
5314        gfx_v8_0_set_rlc_funcs(adev);
5315
5316        return 0;
5317}
5318
5319static int gfx_v8_0_late_init(void *handle)
5320{
5321        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5322        int r;
5323
5324        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5325        if (r)
5326                return r;
5327
5328        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5329        if (r)
5330                return r;
5331
5332        /* requires IBs so do in late init after IB pool is initialized */
5333        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5334        if (r)
5335                return r;
5336
5337        r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5338        if (r) {
5339                DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5340                return r;
5341        }
5342
5343        r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5344        if (r) {
5345                DRM_ERROR(
5346                        "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5347                        r);
5348                return r;
5349        }
5350
5351        return 0;
5352}
5353
5354static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5355                                                       bool enable)
5356{
5357        if ((adev->asic_type == CHIP_POLARIS11) ||
5358            (adev->asic_type == CHIP_POLARIS12) ||
5359            (adev->asic_type == CHIP_VEGAM))
5360                /* Send msg to SMU via Powerplay */
5361                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5362
5363        WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5364}
5365
5366static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5367                                                        bool enable)
5368{
5369        WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5370}
5371
5372static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5373                bool enable)
5374{
5375        WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5376}
5377
5378static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5379                                          bool enable)
5380{
5381        WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5382}
5383
5384static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5385                                                bool enable)
5386{
5387        WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5388
5389        /* Read any GFX register to wake up GFX. */
5390        if (!enable)
5391                RREG32(mmDB_RENDER_CONTROL);
5392}
5393
5394static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5395                                          bool enable)
5396{
5397        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5398                cz_enable_gfx_cg_power_gating(adev, true);
5399                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5400                        cz_enable_gfx_pipeline_power_gating(adev, true);
5401        } else {
5402                cz_enable_gfx_cg_power_gating(adev, false);
5403                cz_enable_gfx_pipeline_power_gating(adev, false);
5404        }
5405}
5406
5407static int gfx_v8_0_set_powergating_state(void *handle,
5408                                          enum amd_powergating_state state)
5409{
5410        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5411        bool enable = (state == AMD_PG_STATE_GATE);
5412
5413        if (amdgpu_sriov_vf(adev))
5414                return 0;
5415
5416        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5417                                AMD_PG_SUPPORT_RLC_SMU_HS |
5418                                AMD_PG_SUPPORT_CP |
5419                                AMD_PG_SUPPORT_GFX_DMG))
5420                amdgpu_gfx_rlc_enter_safe_mode(adev);
5421        switch (adev->asic_type) {
5422        case CHIP_CARRIZO:
5423        case CHIP_STONEY:
5424
5425                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5426                        cz_enable_sck_slow_down_on_power_up(adev, true);
5427                        cz_enable_sck_slow_down_on_power_down(adev, true);
5428                } else {
5429                        cz_enable_sck_slow_down_on_power_up(adev, false);
5430                        cz_enable_sck_slow_down_on_power_down(adev, false);
5431                }
5432                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5433                        cz_enable_cp_power_gating(adev, true);
5434                else
5435                        cz_enable_cp_power_gating(adev, false);
5436
5437                cz_update_gfx_cg_power_gating(adev, enable);
5438
5439                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5440                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5441                else
5442                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5443
5444                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5445                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5446                else
5447                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5448                break;
5449        case CHIP_POLARIS11:
5450        case CHIP_POLARIS12:
5451        case CHIP_VEGAM:
5452                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5453                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5454                else
5455                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5456
5457                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5458                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5459                else
5460                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5461
5462                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5463                        polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5464                else
5465                        polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5466                break;
5467        default:
5468                break;
5469        }
5470        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5471                                AMD_PG_SUPPORT_RLC_SMU_HS |
5472                                AMD_PG_SUPPORT_CP |
5473                                AMD_PG_SUPPORT_GFX_DMG))
5474                amdgpu_gfx_rlc_exit_safe_mode(adev);
5475        return 0;
5476}
5477
5478static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5479{
5480        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5481        int data;
5482
5483        if (amdgpu_sriov_vf(adev))
5484                *flags = 0;
5485
5486        /* AMD_CG_SUPPORT_GFX_MGCG */
5487        data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5488        if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5489                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5490
5491        /* AMD_CG_SUPPORT_GFX_CGLG */
5492        data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5493        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5494                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5495
5496        /* AMD_CG_SUPPORT_GFX_CGLS */
5497        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5498                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5499
5500        /* AMD_CG_SUPPORT_GFX_CGTS */
5501        data = RREG32(mmCGTS_SM_CTRL_REG);
5502        if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5503                *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5504
5505        /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5506        if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5507                *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5508
5509        /* AMD_CG_SUPPORT_GFX_RLC_LS */
5510        data = RREG32(mmRLC_MEM_SLP_CNTL);
5511        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5512                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5513
5514        /* AMD_CG_SUPPORT_GFX_CP_LS */
5515        data = RREG32(mmCP_MEM_SLP_CNTL);
5516        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5517                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5518}
5519
5520static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5521                                     uint32_t reg_addr, uint32_t cmd)
5522{
5523        uint32_t data;
5524
5525        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5526
5527        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5528        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5529
5530        data = RREG32(mmRLC_SERDES_WR_CTRL);
5531        if (adev->asic_type == CHIP_STONEY)
5532                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5533                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5534                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5535                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5536                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5537                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5538                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5539                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5540                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5541        else
5542                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5543                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5544                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5545                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5546                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5547                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5548                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5549                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5550                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5551                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5552                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5553        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5554                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5555                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5556                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5557
5558        WREG32(mmRLC_SERDES_WR_CTRL, data);
5559}
5560
5561#define MSG_ENTER_RLC_SAFE_MODE     1
5562#define MSG_EXIT_RLC_SAFE_MODE      0
5563#define RLC_GPR_REG2__REQ_MASK 0x00000001
5564#define RLC_GPR_REG2__REQ__SHIFT 0
5565#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5566#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5567
5568static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5569{
5570        uint32_t rlc_setting;
5571
5572        rlc_setting = RREG32(mmRLC_CNTL);
5573        if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5574                return false;
5575
5576        return true;
5577}
5578
5579static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5580{
5581        uint32_t data;
5582        unsigned i;
5583        data = RREG32(mmRLC_CNTL);
5584        data |= RLC_SAFE_MODE__CMD_MASK;
5585        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5586        data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5587        WREG32(mmRLC_SAFE_MODE, data);
5588
5589        /* wait for RLC_SAFE_MODE */
5590        for (i = 0; i < adev->usec_timeout; i++) {
5591                if ((RREG32(mmRLC_GPM_STAT) &
5592                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5593                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5594                    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5595                     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5596                        break;
5597                udelay(1);
5598        }
5599        for (i = 0; i < adev->usec_timeout; i++) {
5600                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5601                        break;
5602                udelay(1);
5603        }
5604}
5605
5606static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5607{
5608        uint32_t data;
5609        unsigned i;
5610
5611        data = RREG32(mmRLC_CNTL);
5612        data |= RLC_SAFE_MODE__CMD_MASK;
5613        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5614        WREG32(mmRLC_SAFE_MODE, data);
5615
5616        for (i = 0; i < adev->usec_timeout; i++) {
5617                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5618                        break;
5619                udelay(1);
5620        }
5621}
5622
5623static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5624{
5625        u32 data;
5626
5627        if (amdgpu_sriov_is_pp_one_vf(adev))
5628                data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5629        else
5630                data = RREG32(mmRLC_SPM_VMID);
5631
5632        data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5633        data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5634
5635        if (amdgpu_sriov_is_pp_one_vf(adev))
5636                WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5637        else
5638                WREG32(mmRLC_SPM_VMID, data);
5639}
5640
5641static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5642        .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5643        .set_safe_mode = gfx_v8_0_set_safe_mode,
5644        .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5645        .init = gfx_v8_0_rlc_init,
5646        .get_csb_size = gfx_v8_0_get_csb_size,
5647        .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5648        .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5649        .resume = gfx_v8_0_rlc_resume,
5650        .stop = gfx_v8_0_rlc_stop,
5651        .reset = gfx_v8_0_rlc_reset,
5652        .start = gfx_v8_0_rlc_start,
5653        .update_spm_vmid = gfx_v8_0_update_spm_vmid
5654};
5655
5656static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5657                                                      bool enable)
5658{
5659        uint32_t temp, data;
5660
5661        amdgpu_gfx_rlc_enter_safe_mode(adev);
5662
5663        /* It is disabled by HW by default */
5664        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5665                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5666                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5667                                /* 1 - RLC memory Light sleep */
5668                                WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5669
5670                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5671                                WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5672                }
5673
5674                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5675                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5676                if (adev->flags & AMD_IS_APU)
5677                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5678                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5679                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5680                else
5681                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5684                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685
5686                if (temp != data)
5687                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5688
5689                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5690                gfx_v8_0_wait_for_rlc_serdes(adev);
5691
5692                /* 5 - clear mgcg override */
5693                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5694
5695                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5696                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5697                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5698                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5699                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5700                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5701                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5702                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5703                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5704                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5705                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5706                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5707                        if (temp != data)
5708                                WREG32(mmCGTS_SM_CTRL_REG, data);
5709                }
5710                udelay(50);
5711
5712                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713                gfx_v8_0_wait_for_rlc_serdes(adev);
5714        } else {
5715                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5716                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5718                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5719                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5720                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5721                if (temp != data)
5722                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5723
5724                /* 2 - disable MGLS in RLC */
5725                data = RREG32(mmRLC_MEM_SLP_CNTL);
5726                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5727                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5728                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5729                }
5730
5731                /* 3 - disable MGLS in CP */
5732                data = RREG32(mmCP_MEM_SLP_CNTL);
5733                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5734                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5735                        WREG32(mmCP_MEM_SLP_CNTL, data);
5736                }
5737
5738                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5739                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5740                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5741                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5742                if (temp != data)
5743                        WREG32(mmCGTS_SM_CTRL_REG, data);
5744
5745                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5746                gfx_v8_0_wait_for_rlc_serdes(adev);
5747
5748                /* 6 - set mgcg override */
5749                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5750
5751                udelay(50);
5752
5753                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5754                gfx_v8_0_wait_for_rlc_serdes(adev);
5755        }
5756
5757        amdgpu_gfx_rlc_exit_safe_mode(adev);
5758}
5759
5760static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5761                                                      bool enable)
5762{
5763        uint32_t temp, temp1, data, data1;
5764
5765        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5766
5767        amdgpu_gfx_rlc_enter_safe_mode(adev);
5768
5769        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5770                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5771                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5772                if (temp1 != data1)
5773                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5774
5775                /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5776                gfx_v8_0_wait_for_rlc_serdes(adev);
5777
5778                /* 2 - clear cgcg override */
5779                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5780
5781                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5782                gfx_v8_0_wait_for_rlc_serdes(adev);
5783
5784                /* 3 - write cmd to set CGLS */
5785                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5786
5787                /* 4 - enable cgcg */
5788                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5789
5790                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5791                        /* enable cgls*/
5792                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5793
5794                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5795                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5796
5797                        if (temp1 != data1)
5798                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5799                } else {
5800                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5801                }
5802
5803                if (temp != data)
5804                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5805
5806                /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5807                 * Cmp_busy/GFX_Idle interrupts
5808                 */
5809                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5810        } else {
5811                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5812                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5813
5814                /* TEST CGCG */
5815                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5816                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5817                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5818                if (temp1 != data1)
5819                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5820
5821                /* read gfx register to wake up cgcg */
5822                RREG32(mmCB_CGTT_SCLK_CTRL);
5823                RREG32(mmCB_CGTT_SCLK_CTRL);
5824                RREG32(mmCB_CGTT_SCLK_CTRL);
5825                RREG32(mmCB_CGTT_SCLK_CTRL);
5826
5827                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5828                gfx_v8_0_wait_for_rlc_serdes(adev);
5829
5830                /* write cmd to Set CGCG Overrride */
5831                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5832
5833                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5834                gfx_v8_0_wait_for_rlc_serdes(adev);
5835
5836                /* write cmd to Clear CGLS */
5837                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5838
5839                /* disable cgcg, cgls should be disabled too. */
5840                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5841                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5842                if (temp != data)
5843                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5844                /* enable interrupts again for PG */
5845                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5846        }
5847
5848        gfx_v8_0_wait_for_rlc_serdes(adev);
5849
5850        amdgpu_gfx_rlc_exit_safe_mode(adev);
5851}
5852static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5853                                            bool enable)
5854{
5855        if (enable) {
5856                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5857                 * ===  MGCG + MGLS + TS(CG/LS) ===
5858                 */
5859                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5860                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5861        } else {
5862                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5863                 * ===  CGCG + CGLS ===
5864                 */
5865                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5866                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5867        }
5868        return 0;
5869}
5870
5871static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5872                                          enum amd_clockgating_state state)
5873{
5874        uint32_t msg_id, pp_state = 0;
5875        uint32_t pp_support_state = 0;
5876
5877        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5878                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5879                        pp_support_state = PP_STATE_SUPPORT_LS;
5880                        pp_state = PP_STATE_LS;
5881                }
5882                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5883                        pp_support_state |= PP_STATE_SUPPORT_CG;
5884                        pp_state |= PP_STATE_CG;
5885                }
5886                if (state == AMD_CG_STATE_UNGATE)
5887                        pp_state = 0;
5888
5889                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5890                                PP_BLOCK_GFX_CG,
5891                                pp_support_state,
5892                                pp_state);
5893                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5894        }
5895
5896        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5897                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5898                        pp_support_state = PP_STATE_SUPPORT_LS;
5899                        pp_state = PP_STATE_LS;
5900                }
5901
5902                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5903                        pp_support_state |= PP_STATE_SUPPORT_CG;
5904                        pp_state |= PP_STATE_CG;
5905                }
5906
5907                if (state == AMD_CG_STATE_UNGATE)
5908                        pp_state = 0;
5909
5910                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911                                PP_BLOCK_GFX_MG,
5912                                pp_support_state,
5913                                pp_state);
5914                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5915        }
5916
5917        return 0;
5918}
5919
5920static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5921                                          enum amd_clockgating_state state)
5922{
5923
5924        uint32_t msg_id, pp_state = 0;
5925        uint32_t pp_support_state = 0;
5926
5927        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5928                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5929                        pp_support_state = PP_STATE_SUPPORT_LS;
5930                        pp_state = PP_STATE_LS;
5931                }
5932                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5933                        pp_support_state |= PP_STATE_SUPPORT_CG;
5934                        pp_state |= PP_STATE_CG;
5935                }
5936                if (state == AMD_CG_STATE_UNGATE)
5937                        pp_state = 0;
5938
5939                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5940                                PP_BLOCK_GFX_CG,
5941                                pp_support_state,
5942                                pp_state);
5943                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5944        }
5945
5946        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5947                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5948                        pp_support_state = PP_STATE_SUPPORT_LS;
5949                        pp_state = PP_STATE_LS;
5950                }
5951                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5952                        pp_support_state |= PP_STATE_SUPPORT_CG;
5953                        pp_state |= PP_STATE_CG;
5954                }
5955                if (state == AMD_CG_STATE_UNGATE)
5956                        pp_state = 0;
5957
5958                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5959                                PP_BLOCK_GFX_3D,
5960                                pp_support_state,
5961                                pp_state);
5962                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5963        }
5964
5965        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5966                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5967                        pp_support_state = PP_STATE_SUPPORT_LS;
5968                        pp_state = PP_STATE_LS;
5969                }
5970
5971                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5972                        pp_support_state |= PP_STATE_SUPPORT_CG;
5973                        pp_state |= PP_STATE_CG;
5974                }
5975
5976                if (state == AMD_CG_STATE_UNGATE)
5977                        pp_state = 0;
5978
5979                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5980                                PP_BLOCK_GFX_MG,
5981                                pp_support_state,
5982                                pp_state);
5983                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5984        }
5985
5986        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5987                pp_support_state = PP_STATE_SUPPORT_LS;
5988
5989                if (state == AMD_CG_STATE_UNGATE)
5990                        pp_state = 0;
5991                else
5992                        pp_state = PP_STATE_LS;
5993
5994                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5995                                PP_BLOCK_GFX_RLC,
5996                                pp_support_state,
5997                                pp_state);
5998                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5999        }
6000
6001        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6002                pp_support_state = PP_STATE_SUPPORT_LS;
6003
6004                if (state == AMD_CG_STATE_UNGATE)
6005                        pp_state = 0;
6006                else
6007                        pp_state = PP_STATE_LS;
6008                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6009                        PP_BLOCK_GFX_CP,
6010                        pp_support_state,
6011                        pp_state);
6012                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6013        }
6014
6015        return 0;
6016}
6017
6018static int gfx_v8_0_set_clockgating_state(void *handle,
6019                                          enum amd_clockgating_state state)
6020{
6021        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6022
6023        if (amdgpu_sriov_vf(adev))
6024                return 0;
6025
6026        switch (adev->asic_type) {
6027        case CHIP_FIJI:
6028        case CHIP_CARRIZO:
6029        case CHIP_STONEY:
6030                gfx_v8_0_update_gfx_clock_gating(adev,
6031                                                 state == AMD_CG_STATE_GATE);
6032                break;
6033        case CHIP_TONGA:
6034                gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6035                break;
6036        case CHIP_POLARIS10:
6037        case CHIP_POLARIS11:
6038        case CHIP_POLARIS12:
6039        case CHIP_VEGAM:
6040                gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6041                break;
6042        default:
6043                break;
6044        }
6045        return 0;
6046}
6047
6048static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6049{
6050        return ring->adev->wb.wb[ring->rptr_offs];
6051}
6052
6053static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6054{
6055        struct amdgpu_device *adev = ring->adev;
6056
6057        if (ring->use_doorbell)
6058                /* XXX check if swapping is necessary on BE */
6059                return ring->adev->wb.wb[ring->wptr_offs];
6060        else
6061                return RREG32(mmCP_RB0_WPTR);
6062}
6063
6064static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6065{
6066        struct amdgpu_device *adev = ring->adev;
6067
6068        if (ring->use_doorbell) {
6069                /* XXX check if swapping is necessary on BE */
6070                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6071                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6072        } else {
6073                WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6074                (void)RREG32(mmCP_RB0_WPTR);
6075        }
6076}
6077
6078static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6079{
6080        u32 ref_and_mask, reg_mem_engine;
6081
6082        if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6083            (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6084                switch (ring->me) {
6085                case 1:
6086                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6087                        break;
6088                case 2:
6089                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6090                        break;
6091                default:
6092                        return;
6093                }
6094                reg_mem_engine = 0;
6095        } else {
6096                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6097                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6098        }
6099
6100        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6101        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6102                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6103                                 reg_mem_engine));
6104        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6105        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6106        amdgpu_ring_write(ring, ref_and_mask);
6107        amdgpu_ring_write(ring, ref_and_mask);
6108        amdgpu_ring_write(ring, 0x20); /* poll interval */
6109}
6110
6111static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6112{
6113        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6114        amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6115                EVENT_INDEX(4));
6116
6117        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6118        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6119                EVENT_INDEX(0));
6120}
6121
6122static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6123                                        struct amdgpu_job *job,
6124                                        struct amdgpu_ib *ib,
6125                                        uint32_t flags)
6126{
6127        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6128        u32 header, control = 0;
6129
6130        if (ib->flags & AMDGPU_IB_FLAG_CE)
6131                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6132        else
6133                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6134
6135        control |= ib->length_dw | (vmid << 24);
6136
6137        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6138                control |= INDIRECT_BUFFER_PRE_ENB(1);
6139
6140                if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6141                        gfx_v8_0_ring_emit_de_meta(ring);
6142        }
6143
6144        amdgpu_ring_write(ring, header);
6145        amdgpu_ring_write(ring,
6146#ifdef __BIG_ENDIAN
6147                          (2 << 0) |
6148#endif
6149                          (ib->gpu_addr & 0xFFFFFFFC));
6150        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6151        amdgpu_ring_write(ring, control);
6152}
6153
6154static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6155                                          struct amdgpu_job *job,
6156                                          struct amdgpu_ib *ib,
6157                                          uint32_t flags)
6158{
6159        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6160        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6161
6162        /* Currently, there is a high possibility to get wave ID mismatch
6163         * between ME and GDS, leading to a hw deadlock, because ME generates
6164         * different wave IDs than the GDS expects. This situation happens
6165         * randomly when at least 5 compute pipes use GDS ordered append.
6166         * The wave IDs generated by ME are also wrong after suspend/resume.
6167         * Those are probably bugs somewhere else in the kernel driver.
6168         *
6169         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6170         * GDS to 0 for this ring (me/pipe).
6171         */
6172        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6173                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6174                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6175                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6176        }
6177
6178        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6179        amdgpu_ring_write(ring,
6180#ifdef __BIG_ENDIAN
6181                                (2 << 0) |
6182#endif
6183                                (ib->gpu_addr & 0xFFFFFFFC));
6184        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6185        amdgpu_ring_write(ring, control);
6186}
6187
6188static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6189                                         u64 seq, unsigned flags)
6190{
6191        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6192        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6193
6194        /* Workaround for cache flush problems. First send a dummy EOP
6195         * event down the pipe with seq one below.
6196         */
6197        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6198        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6199                                 EOP_TC_ACTION_EN |
6200                                 EOP_TC_WB_ACTION_EN |
6201                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6202                                 EVENT_INDEX(5)));
6203        amdgpu_ring_write(ring, addr & 0xfffffffc);
6204        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6205                                DATA_SEL(1) | INT_SEL(0));
6206        amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6207        amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6208
6209        /* Then send the real EOP event down the pipe:
6210         * EVENT_WRITE_EOP - flush caches, send int */
6211        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6212        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6213                                 EOP_TC_ACTION_EN |
6214                                 EOP_TC_WB_ACTION_EN |
6215                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6216                                 EVENT_INDEX(5)));
6217        amdgpu_ring_write(ring, addr & 0xfffffffc);
6218        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6219                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6220        amdgpu_ring_write(ring, lower_32_bits(seq));
6221        amdgpu_ring_write(ring, upper_32_bits(seq));
6222
6223}
6224
6225static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6226{
6227        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6228        uint32_t seq = ring->fence_drv.sync_seq;
6229        uint64_t addr = ring->fence_drv.gpu_addr;
6230
6231        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6232        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6233                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6234                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6235        amdgpu_ring_write(ring, addr & 0xfffffffc);
6236        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6237        amdgpu_ring_write(ring, seq);
6238        amdgpu_ring_write(ring, 0xffffffff);
6239        amdgpu_ring_write(ring, 4); /* poll interval */
6240}
6241
6242static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6243                                        unsigned vmid, uint64_t pd_addr)
6244{
6245        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6246
6247        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6248
6249        /* wait for the invalidate to complete */
6250        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6251        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6252                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6253                                 WAIT_REG_MEM_ENGINE(0))); /* me */
6254        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6255        amdgpu_ring_write(ring, 0);
6256        amdgpu_ring_write(ring, 0); /* ref */
6257        amdgpu_ring_write(ring, 0); /* mask */
6258        amdgpu_ring_write(ring, 0x20); /* poll interval */
6259
6260        /* compute doesn't have PFP */
6261        if (usepfp) {
6262                /* sync PFP to ME, otherwise we might get invalid PFP reads */
6263                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6264                amdgpu_ring_write(ring, 0x0);
6265        }
6266}
6267
6268static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6269{
6270        return ring->adev->wb.wb[ring->wptr_offs];
6271}
6272
6273static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6274{
6275        struct amdgpu_device *adev = ring->adev;
6276
6277        /* XXX check if swapping is necessary on BE */
6278        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6279        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6280}
6281
6282static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6283                                             u64 addr, u64 seq,
6284                                             unsigned flags)
6285{
6286        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6287        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6288
6289        /* RELEASE_MEM - flush caches, send int */
6290        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6291        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6292                                 EOP_TC_ACTION_EN |
6293                                 EOP_TC_WB_ACTION_EN |
6294                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6295                                 EVENT_INDEX(5)));
6296        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6297        amdgpu_ring_write(ring, addr & 0xfffffffc);
6298        amdgpu_ring_write(ring, upper_32_bits(addr));
6299        amdgpu_ring_write(ring, lower_32_bits(seq));
6300        amdgpu_ring_write(ring, upper_32_bits(seq));
6301}
6302
6303static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6304                                         u64 seq, unsigned int flags)
6305{
6306        /* we only allocate 32bit for each seq wb address */
6307        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6308
6309        /* write fence seq to the "addr" */
6310        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6311        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6312                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6313        amdgpu_ring_write(ring, lower_32_bits(addr));
6314        amdgpu_ring_write(ring, upper_32_bits(addr));
6315        amdgpu_ring_write(ring, lower_32_bits(seq));
6316
6317        if (flags & AMDGPU_FENCE_FLAG_INT) {
6318                /* set register to trigger INT */
6319                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6320                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6321                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6322                amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6323                amdgpu_ring_write(ring, 0);
6324                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6325        }
6326}
6327
6328static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6329{
6330        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6331        amdgpu_ring_write(ring, 0);
6332}
6333
6334static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6335{
6336        uint32_t dw2 = 0;
6337
6338        if (amdgpu_sriov_vf(ring->adev))
6339                gfx_v8_0_ring_emit_ce_meta(ring);
6340
6341        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6342        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6343                gfx_v8_0_ring_emit_vgt_flush(ring);
6344                /* set load_global_config & load_global_uconfig */
6345                dw2 |= 0x8001;
6346                /* set load_cs_sh_regs */
6347                dw2 |= 0x01000000;
6348                /* set load_per_context_state & load_gfx_sh_regs for GFX */
6349                dw2 |= 0x10002;
6350
6351                /* set load_ce_ram if preamble presented */
6352                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6353                        dw2 |= 0x10000000;
6354        } else {
6355                /* still load_ce_ram if this is the first time preamble presented
6356                 * although there is no context switch happens.
6357                 */
6358                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6359                        dw2 |= 0x10000000;
6360        }
6361
6362        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6363        amdgpu_ring_write(ring, dw2);
6364        amdgpu_ring_write(ring, 0);
6365}
6366
6367static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6368{
6369        unsigned ret;
6370
6371        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6372        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6373        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6374        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6375        ret = ring->wptr & ring->buf_mask;
6376        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6377        return ret;
6378}
6379
6380static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6381{
6382        unsigned cur;
6383
6384        BUG_ON(offset > ring->buf_mask);
6385        BUG_ON(ring->ring[offset] != 0x55aa55aa);
6386
6387        cur = (ring->wptr & ring->buf_mask) - 1;
6388        if (likely(cur > offset))
6389                ring->ring[offset] = cur - offset;
6390        else
6391                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6392}
6393
6394static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6395                                    uint32_t reg_val_offs)
6396{
6397        struct amdgpu_device *adev = ring->adev;
6398
6399        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6400        amdgpu_ring_write(ring, 0 |     /* src: register*/
6401                                (5 << 8) |      /* dst: memory */
6402                                (1 << 20));     /* write confirm */
6403        amdgpu_ring_write(ring, reg);
6404        amdgpu_ring_write(ring, 0);
6405        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6406                                reg_val_offs * 4));
6407        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6408                                reg_val_offs * 4));
6409}
6410
6411static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6412                                  uint32_t val)
6413{
6414        uint32_t cmd;
6415
6416        switch (ring->funcs->type) {
6417        case AMDGPU_RING_TYPE_GFX:
6418                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6419                break;
6420        case AMDGPU_RING_TYPE_KIQ:
6421                cmd = 1 << 16; /* no inc addr */
6422                break;
6423        default:
6424                cmd = WR_CONFIRM;
6425                break;
6426        }
6427
6428        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6429        amdgpu_ring_write(ring, cmd);
6430        amdgpu_ring_write(ring, reg);
6431        amdgpu_ring_write(ring, 0);
6432        amdgpu_ring_write(ring, val);
6433}
6434
6435static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6436{
6437        struct amdgpu_device *adev = ring->adev;
6438        uint32_t value = 0;
6439
6440        value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6441        value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6442        value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6443        value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6444        WREG32(mmSQ_CMD, value);
6445}
6446
6447static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6448                                                 enum amdgpu_interrupt_state state)
6449{
6450        WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6451                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6452}
6453
6454static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6455                                                     int me, int pipe,
6456                                                     enum amdgpu_interrupt_state state)
6457{
6458        u32 mec_int_cntl, mec_int_cntl_reg;
6459
6460        /*
6461         * amdgpu controls only the first MEC. That's why this function only
6462         * handles the setting of interrupts for this specific MEC. All other
6463         * pipes' interrupts are set by amdkfd.
6464         */
6465
6466        if (me == 1) {
6467                switch (pipe) {
6468                case 0:
6469                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6470                        break;
6471                case 1:
6472                        mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6473                        break;
6474                case 2:
6475                        mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6476                        break;
6477                case 3:
6478                        mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6479                        break;
6480                default:
6481                        DRM_DEBUG("invalid pipe %d\n", pipe);
6482                        return;
6483                }
6484        } else {
6485                DRM_DEBUG("invalid me %d\n", me);
6486                return;
6487        }
6488
6489        switch (state) {
6490        case AMDGPU_IRQ_STATE_DISABLE:
6491                mec_int_cntl = RREG32(mec_int_cntl_reg);
6492                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6493                WREG32(mec_int_cntl_reg, mec_int_cntl);
6494                break;
6495        case AMDGPU_IRQ_STATE_ENABLE:
6496                mec_int_cntl = RREG32(mec_int_cntl_reg);
6497                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6498                WREG32(mec_int_cntl_reg, mec_int_cntl);
6499                break;
6500        default:
6501                break;
6502        }
6503}
6504
6505static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6506                                             struct amdgpu_irq_src *source,
6507                                             unsigned type,
6508                                             enum amdgpu_interrupt_state state)
6509{
6510        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6511                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6512
6513        return 0;
6514}
6515
6516static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6517                                              struct amdgpu_irq_src *source,
6518                                              unsigned type,
6519                                              enum amdgpu_interrupt_state state)
6520{
6521        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6522                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6523
6524        return 0;
6525}
6526
6527static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6528                                            struct amdgpu_irq_src *src,
6529                                            unsigned type,
6530                                            enum amdgpu_interrupt_state state)
6531{
6532        switch (type) {
6533        case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6534                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6535                break;
6536        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6537                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6538                break;
6539        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6540                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6541                break;
6542        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6543                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6544                break;
6545        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6546                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6547                break;
6548        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6549                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6550                break;
6551        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6552                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6553                break;
6554        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6555                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6556                break;
6557        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6558                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6559                break;
6560        default:
6561                break;
6562        }
6563        return 0;
6564}
6565
6566static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6567                                         struct amdgpu_irq_src *source,
6568                                         unsigned int type,
6569                                         enum amdgpu_interrupt_state state)
6570{
6571        int enable_flag;
6572
6573        switch (state) {
6574        case AMDGPU_IRQ_STATE_DISABLE:
6575                enable_flag = 0;
6576                break;
6577
6578        case AMDGPU_IRQ_STATE_ENABLE:
6579                enable_flag = 1;
6580                break;
6581
6582        default:
6583                return -EINVAL;
6584        }
6585
6586        WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587        WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588        WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589        WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6590        WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6591        WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                     enable_flag);
6593        WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                     enable_flag);
6595        WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                     enable_flag);
6597        WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                     enable_flag);
6599        WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                     enable_flag);
6601        WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                     enable_flag);
6603        WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                     enable_flag);
6605        WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6606                     enable_flag);
6607
6608        return 0;
6609}
6610
6611static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6612                                     struct amdgpu_irq_src *source,
6613                                     unsigned int type,
6614                                     enum amdgpu_interrupt_state state)
6615{
6616        int enable_flag;
6617
6618        switch (state) {
6619        case AMDGPU_IRQ_STATE_DISABLE:
6620                enable_flag = 1;
6621                break;
6622
6623        case AMDGPU_IRQ_STATE_ENABLE:
6624                enable_flag = 0;
6625                break;
6626
6627        default:
6628                return -EINVAL;
6629        }
6630
6631        WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6632                     enable_flag);
6633
6634        return 0;
6635}
6636
6637static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6638                            struct amdgpu_irq_src *source,
6639                            struct amdgpu_iv_entry *entry)
6640{
6641        int i;
6642        u8 me_id, pipe_id, queue_id;
6643        struct amdgpu_ring *ring;
6644
6645        DRM_DEBUG("IH: CP EOP\n");
6646        me_id = (entry->ring_id & 0x0c) >> 2;
6647        pipe_id = (entry->ring_id & 0x03) >> 0;
6648        queue_id = (entry->ring_id & 0x70) >> 4;
6649
6650        switch (me_id) {
6651        case 0:
6652                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6653                break;
6654        case 1:
6655        case 2:
6656                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6657                        ring = &adev->gfx.compute_ring[i];
6658                        /* Per-queue interrupt is supported for MEC starting from VI.
6659                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6660                          */
6661                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6662                                amdgpu_fence_process(ring);
6663                }
6664                break;
6665        }
6666        return 0;
6667}
6668
6669static void gfx_v8_0_fault(struct amdgpu_device *adev,
6670                           struct amdgpu_iv_entry *entry)
6671{
6672        u8 me_id, pipe_id, queue_id;
6673        struct amdgpu_ring *ring;
6674        int i;
6675
6676        me_id = (entry->ring_id & 0x0c) >> 2;
6677        pipe_id = (entry->ring_id & 0x03) >> 0;
6678        queue_id = (entry->ring_id & 0x70) >> 4;
6679
6680        switch (me_id) {
6681        case 0:
6682                drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6683                break;
6684        case 1:
6685        case 2:
6686                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6687                        ring = &adev->gfx.compute_ring[i];
6688                        if (ring->me == me_id && ring->pipe == pipe_id &&
6689                            ring->queue == queue_id)
6690                                drm_sched_fault(&ring->sched);
6691                }
6692                break;
6693        }
6694}
6695
6696static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6697                                 struct amdgpu_irq_src *source,
6698                                 struct amdgpu_iv_entry *entry)
6699{
6700        DRM_ERROR("Illegal register access in command stream\n");
6701        gfx_v8_0_fault(adev, entry);
6702        return 0;
6703}
6704
6705static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6706                                  struct amdgpu_irq_src *source,
6707                                  struct amdgpu_iv_entry *entry)
6708{
6709        DRM_ERROR("Illegal instruction in command stream\n");
6710        gfx_v8_0_fault(adev, entry);
6711        return 0;
6712}
6713
6714static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6715                                     struct amdgpu_irq_src *source,
6716                                     struct amdgpu_iv_entry *entry)
6717{
6718        DRM_ERROR("CP EDC/ECC error detected.");
6719        return 0;
6720}
6721
6722static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6723{
6724        u32 enc, se_id, sh_id, cu_id;
6725        char type[20];
6726        int sq_edc_source = -1;
6727
6728        enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6729        se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6730
6731        switch (enc) {
6732                case 0:
6733                        DRM_INFO("SQ general purpose intr detected:"
6734                                        "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6735                                        "host_cmd_overflow %d, cmd_timestamp %d,"
6736                                        "reg_timestamp %d, thread_trace_buff_full %d,"
6737                                        "wlt %d, thread_trace %d.\n",
6738                                        se_id,
6739                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6740                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6741                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6742                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6743                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6744                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6745                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6746                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6747                                        );
6748                        break;
6749                case 1:
6750                case 2:
6751
6752                        cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6753                        sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6754
6755                        /*
6756                         * This function can be called either directly from ISR
6757                         * or from BH in which case we can access SQ_EDC_INFO
6758                         * instance
6759                         */
6760                        if (in_task()) {
6761                                mutex_lock(&adev->grbm_idx_mutex);
6762                                gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6763
6764                                sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6765
6766                                gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6767                                mutex_unlock(&adev->grbm_idx_mutex);
6768                        }
6769
6770                        if (enc == 1)
6771                                sprintf(type, "instruction intr");
6772                        else
6773                                sprintf(type, "EDC/ECC error");
6774
6775                        DRM_INFO(
6776                                "SQ %s detected: "
6777                                        "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6778                                        "trap %s, sq_ed_info.source %s.\n",
6779                                        type, se_id, sh_id, cu_id,
6780                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6781                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6782                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6783                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6784                                        (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6785                                );
6786                        break;
6787                default:
6788                        DRM_ERROR("SQ invalid encoding type\n.");
6789        }
6790}
6791
6792static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6793{
6794
6795        struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6796        struct sq_work *sq_work = container_of(work, struct sq_work, work);
6797
6798        gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6799}
6800
6801static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6802                           struct amdgpu_irq_src *source,
6803                           struct amdgpu_iv_entry *entry)
6804{
6805        unsigned ih_data = entry->src_data[0];
6806
6807        /*
6808         * Try to submit work so SQ_EDC_INFO can be accessed from
6809         * BH. If previous work submission hasn't finished yet
6810         * just print whatever info is possible directly from the ISR.
6811         */
6812        if (work_pending(&adev->gfx.sq_work.work)) {
6813                gfx_v8_0_parse_sq_irq(adev, ih_data);
6814        } else {
6815                adev->gfx.sq_work.ih_data = ih_data;
6816                schedule_work(&adev->gfx.sq_work.work);
6817        }
6818
6819        return 0;
6820}
6821
6822static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6823{
6824        amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6825        amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6826                          PACKET3_TC_ACTION_ENA |
6827                          PACKET3_SH_KCACHE_ACTION_ENA |
6828                          PACKET3_SH_ICACHE_ACTION_ENA |
6829                          PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6830        amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6831        amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6832        amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6833}
6834
6835static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6836{
6837        amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6838        amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6839                          PACKET3_TC_ACTION_ENA |
6840                          PACKET3_SH_KCACHE_ACTION_ENA |
6841                          PACKET3_SH_ICACHE_ACTION_ENA |
6842                          PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6843        amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6844        amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6845        amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6846        amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6847        amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6848}
6849
6850static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6851        .name = "gfx_v8_0",
6852        .early_init = gfx_v8_0_early_init,
6853        .late_init = gfx_v8_0_late_init,
6854        .sw_init = gfx_v8_0_sw_init,
6855        .sw_fini = gfx_v8_0_sw_fini,
6856        .hw_init = gfx_v8_0_hw_init,
6857        .hw_fini = gfx_v8_0_hw_fini,
6858        .suspend = gfx_v8_0_suspend,
6859        .resume = gfx_v8_0_resume,
6860        .is_idle = gfx_v8_0_is_idle,
6861        .wait_for_idle = gfx_v8_0_wait_for_idle,
6862        .check_soft_reset = gfx_v8_0_check_soft_reset,
6863        .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6864        .soft_reset = gfx_v8_0_soft_reset,
6865        .post_soft_reset = gfx_v8_0_post_soft_reset,
6866        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6867        .set_powergating_state = gfx_v8_0_set_powergating_state,
6868        .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6869};
6870
6871static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6872        .type = AMDGPU_RING_TYPE_GFX,
6873        .align_mask = 0xff,
6874        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6875        .support_64bit_ptrs = false,
6876        .get_rptr = gfx_v8_0_ring_get_rptr,
6877        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6878        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6879        .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6880                5 +  /* COND_EXEC */
6881                7 +  /* PIPELINE_SYNC */
6882                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6883                12 +  /* FENCE for VM_FLUSH */
6884                20 + /* GDS switch */
6885                4 + /* double SWITCH_BUFFER,
6886                       the first COND_EXEC jump to the place just
6887                           prior to this double SWITCH_BUFFER  */
6888                5 + /* COND_EXEC */
6889                7 +      /*     HDP_flush */
6890                4 +      /*     VGT_flush */
6891                14 + /* CE_META */
6892                31 + /* DE_META */
6893                3 + /* CNTX_CTRL */
6894                5 + /* HDP_INVL */
6895                12 + 12 + /* FENCE x2 */
6896                2 + /* SWITCH_BUFFER */
6897                5, /* SURFACE_SYNC */
6898        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6899        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6900        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6901        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6902        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6903        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6904        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6905        .test_ring = gfx_v8_0_ring_test_ring,
6906        .test_ib = gfx_v8_0_ring_test_ib,
6907        .insert_nop = amdgpu_ring_insert_nop,
6908        .pad_ib = amdgpu_ring_generic_pad_ib,
6909        .emit_switch_buffer = gfx_v8_ring_emit_sb,
6910        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6911        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6912        .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6913        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6914        .soft_recovery = gfx_v8_0_ring_soft_recovery,
6915        .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6916};
6917
6918static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6919        .type = AMDGPU_RING_TYPE_COMPUTE,
6920        .align_mask = 0xff,
6921        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6922        .support_64bit_ptrs = false,
6923        .get_rptr = gfx_v8_0_ring_get_rptr,
6924        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6925        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6926        .emit_frame_size =
6927                20 + /* gfx_v8_0_ring_emit_gds_switch */
6928                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6929                5 + /* hdp_invalidate */
6930                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6931                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6932                7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6933                7, /* gfx_v8_0_emit_mem_sync_compute */
6934        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6935        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6936        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6937        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6938        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6939        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6940        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6941        .test_ring = gfx_v8_0_ring_test_ring,
6942        .test_ib = gfx_v8_0_ring_test_ib,
6943        .insert_nop = amdgpu_ring_insert_nop,
6944        .pad_ib = amdgpu_ring_generic_pad_ib,
6945        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6946        .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6947};
6948
6949static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6950        .type = AMDGPU_RING_TYPE_KIQ,
6951        .align_mask = 0xff,
6952        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6953        .support_64bit_ptrs = false,
6954        .get_rptr = gfx_v8_0_ring_get_rptr,
6955        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6956        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6957        .emit_frame_size =
6958                20 + /* gfx_v8_0_ring_emit_gds_switch */
6959                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6960                5 + /* hdp_invalidate */
6961                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6962                17 + /* gfx_v8_0_ring_emit_vm_flush */
6963                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6964        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6965        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6966        .test_ring = gfx_v8_0_ring_test_ring,
6967        .insert_nop = amdgpu_ring_insert_nop,
6968        .pad_ib = amdgpu_ring_generic_pad_ib,
6969        .emit_rreg = gfx_v8_0_ring_emit_rreg,
6970        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6971};
6972
6973static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6974{
6975        int i;
6976
6977        adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6978
6979        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6980                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6981
6982        for (i = 0; i < adev->gfx.num_compute_rings; i++)
6983                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6984}
6985
6986static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6987        .set = gfx_v8_0_set_eop_interrupt_state,
6988        .process = gfx_v8_0_eop_irq,
6989};
6990
6991static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6992        .set = gfx_v8_0_set_priv_reg_fault_state,
6993        .process = gfx_v8_0_priv_reg_irq,
6994};
6995
6996static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6997        .set = gfx_v8_0_set_priv_inst_fault_state,
6998        .process = gfx_v8_0_priv_inst_irq,
6999};
7000
7001static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7002        .set = gfx_v8_0_set_cp_ecc_int_state,
7003        .process = gfx_v8_0_cp_ecc_error_irq,
7004};
7005
7006static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7007        .set = gfx_v8_0_set_sq_int_state,
7008        .process = gfx_v8_0_sq_irq,
7009};
7010
7011static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7012{
7013        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7014        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7015
7016        adev->gfx.priv_reg_irq.num_types = 1;
7017        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7018
7019        adev->gfx.priv_inst_irq.num_types = 1;
7020        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7021
7022        adev->gfx.cp_ecc_error_irq.num_types = 1;
7023        adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7024
7025        adev->gfx.sq_irq.num_types = 1;
7026        adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7027}
7028
7029static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7030{
7031        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7032}
7033
7034static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7035{
7036        /* init asci gds info */
7037        adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7038        adev->gds.gws_size = 64;
7039        adev->gds.oa_size = 16;
7040        adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7041}
7042
7043static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7044                                                 u32 bitmap)
7045{
7046        u32 data;
7047
7048        if (!bitmap)
7049                return;
7050
7051        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7052        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7053
7054        WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7055}
7056
7057static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7058{
7059        u32 data, mask;
7060
7061        data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7062                RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7063
7064        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7065
7066        return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7067}
7068
7069static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7070{
7071        int i, j, k, counter, active_cu_number = 0;
7072        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7073        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7074        unsigned disable_masks[4 * 2];
7075        u32 ao_cu_num;
7076
7077        memset(cu_info, 0, sizeof(*cu_info));
7078
7079        if (adev->flags & AMD_IS_APU)
7080                ao_cu_num = 2;
7081        else
7082                ao_cu_num = adev->gfx.config.max_cu_per_sh;
7083
7084        amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7085
7086        mutex_lock(&adev->grbm_idx_mutex);
7087        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7088                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7089                        mask = 1;
7090                        ao_bitmap = 0;
7091                        counter = 0;
7092                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7093                        if (i < 4 && j < 2)
7094                                gfx_v8_0_set_user_cu_inactive_bitmap(
7095                                        adev, disable_masks[i * 2 + j]);
7096                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7097                        cu_info->bitmap[i][j] = bitmap;
7098
7099                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7100                                if (bitmap & mask) {
7101                                        if (counter < ao_cu_num)
7102                                                ao_bitmap |= mask;
7103                                        counter ++;
7104                                }
7105                                mask <<= 1;
7106                        }
7107                        active_cu_number += counter;
7108                        if (i < 2 && j < 2)
7109                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7110                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7111                }
7112        }
7113        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7114        mutex_unlock(&adev->grbm_idx_mutex);
7115
7116        cu_info->number = active_cu_number;
7117        cu_info->ao_cu_mask = ao_cu_mask;
7118        cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7119        cu_info->max_waves_per_simd = 10;
7120        cu_info->max_scratch_slots_per_cu = 32;
7121        cu_info->wave_front_size = 64;
7122        cu_info->lds_size = 64;
7123}
7124
7125const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7126{
7127        .type = AMD_IP_BLOCK_TYPE_GFX,
7128        .major = 8,
7129        .minor = 0,
7130        .rev = 0,
7131        .funcs = &gfx_v8_0_ip_funcs,
7132};
7133
7134const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7135{
7136        .type = AMD_IP_BLOCK_TYPE_GFX,
7137        .major = 8,
7138        .minor = 1,
7139        .rev = 0,
7140        .funcs = &gfx_v8_0_ip_funcs,
7141};
7142
7143static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7144{
7145        uint64_t ce_payload_addr;
7146        int cnt_ce;
7147        union {
7148                struct vi_ce_ib_state regular;
7149                struct vi_ce_ib_state_chained_ib chained;
7150        } ce_payload = {};
7151
7152        if (ring->adev->virt.chained_ib_support) {
7153                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7154                        offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7155                cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7156        } else {
7157                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7158                        offsetof(struct vi_gfx_meta_data, ce_payload);
7159                cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7160        }
7161
7162        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7163        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7164                                WRITE_DATA_DST_SEL(8) |
7165                                WR_CONFIRM) |
7166                                WRITE_DATA_CACHE_POLICY(0));
7167        amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7168        amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7169        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7170}
7171
7172static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7173{
7174        uint64_t de_payload_addr, gds_addr, csa_addr;
7175        int cnt_de;
7176        union {
7177                struct vi_de_ib_state regular;
7178                struct vi_de_ib_state_chained_ib chained;
7179        } de_payload = {};
7180
7181        csa_addr = amdgpu_csa_vaddr(ring->adev);
7182        gds_addr = csa_addr + 4096;
7183        if (ring->adev->virt.chained_ib_support) {
7184                de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7185                de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7186                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7187                cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7188        } else {
7189                de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7190                de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7191                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7192                cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7193        }
7194
7195        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7196        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7197                                WRITE_DATA_DST_SEL(8) |
7198                                WR_CONFIRM) |
7199                                WRITE_DATA_CACHE_POLICY(0));
7200        amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7201        amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7202        amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7203}
7204