linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/kernel.h>
  24#include <linux/firmware.h>
  25#include <drm/drmP.h>
  26#include "amdgpu.h"
  27#include "amdgpu_gfx.h"
  28#include "vi.h"
  29#include "vi_structs.h"
  30#include "vid.h"
  31#include "amdgpu_ucode.h"
  32#include "amdgpu_atombios.h"
  33#include "atombios_i2c.h"
  34#include "clearstate_vi.h"
  35
  36#include "gmc/gmc_8_2_d.h"
  37#include "gmc/gmc_8_2_sh_mask.h"
  38
  39#include "oss/oss_3_0_d.h"
  40#include "oss/oss_3_0_sh_mask.h"
  41
  42#include "bif/bif_5_0_d.h"
  43#include "bif/bif_5_0_sh_mask.h"
  44#include "gca/gfx_8_0_d.h"
  45#include "gca/gfx_8_0_enum.h"
  46#include "gca/gfx_8_0_sh_mask.h"
  47
  48#include "dce/dce_10_0_d.h"
  49#include "dce/dce_10_0_sh_mask.h"
  50
  51#include "smu/smu_7_1_3_d.h"
  52
  53#include "ivsrcid/ivsrcid_vislands30.h"
  54
  55#define GFX8_NUM_GFX_RINGS     1
  56#define GFX8_MEC_HPD_SIZE 4096
  57
  58#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  59#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  60#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  61#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  62
  63#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  64#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  65#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  66#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  67#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  68#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  69#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  70#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  71#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  72
  73#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  74#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  75#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  76#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  77#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  78#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  79
  80/* BPM SERDES CMD */
  81#define SET_BPM_SERDES_CMD    1
  82#define CLE_BPM_SERDES_CMD    0
  83
  84/* BPM Register Address*/
  85enum {
  86        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  87        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  88        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  89        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  90        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  91        BPM_REG_FGCG_MAX
  92};
  93
  94#define RLC_FormatDirectRegListLength        14
  95
  96MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 100MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 101MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 102
 103MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 106MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 107MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 108
 109MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 113MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 114MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 115
 116MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 119MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 120MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 121
 122MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 126MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 127MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 128
 129MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 130MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 131MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 132MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 133MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 134MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 140
 141MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 142MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 144MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 145MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 146MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 152
 153MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 154MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 156MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 157MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 158MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 164
 165MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 166MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 167MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 168MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 169MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 170MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 171
 172static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 173{
 174        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 175        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 176        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 177        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 178        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 179        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 180        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 181        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 182        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 183        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 184        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 185        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 186        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 187        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 188        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 189        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 190};
 191
 192static const u32 golden_settings_tonga_a11[] =
 193{
 194        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 195        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 196        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 197        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 198        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 199        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 200        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 201        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 202        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 203        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 204        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 205        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 206        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 207        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 208        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 209        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 210};
 211
 212static const u32 tonga_golden_common_all[] =
 213{
 214        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 215        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 216        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 217        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 218        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 219        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 220        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 221        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 222};
 223
 224static const u32 tonga_mgcg_cgcg_init[] =
 225{
 226        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 227        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 228        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 229        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 230        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 231        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 232        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 233        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 234        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 235        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 236        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 237        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 238        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 239        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 240        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 241        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 242        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 243        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 244        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 245        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 246        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 247        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 248        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 249        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 250        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 251        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 252        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 253        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 254        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 255        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 256        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 257        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 258        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 259        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 260        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 261        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 262        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 265        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 280        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 285        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 298        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 299        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 300        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 301};
 302
 303static const u32 golden_settings_vegam_a11[] =
 304{
 305        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 306        mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 307        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 308        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 309        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 310        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 311        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 312        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 313        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 314        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 315        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 316        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 317        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 318        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 319        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 320        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 321        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 322};
 323
 324static const u32 vegam_golden_common_all[] =
 325{
 326        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 327        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 328        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 329        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 330        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 331        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 332};
 333
 334static const u32 golden_settings_polaris11_a11[] =
 335{
 336        mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 337        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 338        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 339        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 340        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 341        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 342        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 343        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 344        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 345        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 346        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 347        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 348        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 349        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 350        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 351        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 352        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 353};
 354
 355static const u32 polaris11_golden_common_all[] =
 356{
 357        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 358        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 359        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 360        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 361        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 362        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 363};
 364
 365static const u32 golden_settings_polaris10_a11[] =
 366{
 367        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 368        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 369        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 370        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 371        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 372        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 373        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 374        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 375        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 376        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 377        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 378        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 379        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 380        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 381        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 382        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 383        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 384};
 385
 386static const u32 polaris10_golden_common_all[] =
 387{
 388        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 389        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 390        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 391        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 392        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 393        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 394        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 395        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 396};
 397
 398static const u32 fiji_golden_common_all[] =
 399{
 400        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 401        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 402        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 403        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 404        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 405        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 406        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 407        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 408        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 409        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 410};
 411
 412static const u32 golden_settings_fiji_a10[] =
 413{
 414        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 415        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 416        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 417        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 418        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 419        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 420        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 421        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 422        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 423        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 424        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 425};
 426
 427static const u32 fiji_mgcg_cgcg_init[] =
 428{
 429        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 430        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 431        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 432        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 433        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 434        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 435        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 436        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 437        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 438        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 439        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 440        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 441        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 442        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 443        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 444        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 445        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 446        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 447        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 448        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 449        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 450        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 451        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 452        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 453        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 454        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 455        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 456        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 457        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 458        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 459        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 460        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 461        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 462        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 463        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 464};
 465
 466static const u32 golden_settings_iceland_a11[] =
 467{
 468        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 469        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 470        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 471        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 472        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 473        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 474        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 475        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 476        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 477        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 478        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 479        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 480        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 481        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 482        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 483        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 484};
 485
 486static const u32 iceland_golden_common_all[] =
 487{
 488        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 489        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 490        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 491        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 492        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 493        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 494        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 495        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 496};
 497
 498static const u32 iceland_mgcg_cgcg_init[] =
 499{
 500        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 501        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 502        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 503        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 504        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 505        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 506        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 507        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 508        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 509        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 510        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 511        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 512        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 513        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 514        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 515        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 516        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 517        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 518        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 519        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 520        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 521        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 522        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 523        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 524        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 525        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 526        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 527        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 528        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 529        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 530        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 531        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 532        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 533        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 534        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 535        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 536        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 539        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 554        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 559        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 562        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 563        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 564};
 565
 566static const u32 cz_golden_settings_a11[] =
 567{
 568        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 569        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 570        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 571        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 572        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 573        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 574        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 575        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 576        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 577        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 578        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 579        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 580};
 581
 582static const u32 cz_golden_common_all[] =
 583{
 584        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 585        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 586        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 587        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 588        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 589        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 590        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 591        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 592};
 593
 594static const u32 cz_mgcg_cgcg_init[] =
 595{
 596        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 597        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 598        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 599        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 600        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 601        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 602        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 603        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 604        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 605        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 606        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 607        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 608        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 609        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 610        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 611        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 612        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 613        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 614        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 615        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 616        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 617        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 618        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 619        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 620        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 621        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 622        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 623        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 624        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 625        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 626        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 627        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 628        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 629        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 630        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 631        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 632        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 635        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 650        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 655        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 668        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 669        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 670        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 671};
 672
 673static const u32 stoney_golden_settings_a11[] =
 674{
 675        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 676        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 677        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 678        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 679        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 680        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 681        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 682        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 683        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 684        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 685};
 686
 687static const u32 stoney_golden_common_all[] =
 688{
 689        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 690        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 691        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 692        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 693        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 694        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 695        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 696        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 697};
 698
 699static const u32 stoney_mgcg_cgcg_init[] =
 700{
 701        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 702        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 703        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 704        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 705        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 706};
 707
 708
 709static const char * const sq_edc_source_names[] = {
 710        "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 711        "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 712        "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 713        "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 714        "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 715        "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 716        "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 717};
 718
 719static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 720static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 721static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 722static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 723static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 724static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 725static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 726static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 727
 728static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 729{
 730        switch (adev->asic_type) {
 731        case CHIP_TOPAZ:
 732                amdgpu_device_program_register_sequence(adev,
 733                                                        iceland_mgcg_cgcg_init,
 734                                                        ARRAY_SIZE(iceland_mgcg_cgcg_init));
 735                amdgpu_device_program_register_sequence(adev,
 736                                                        golden_settings_iceland_a11,
 737                                                        ARRAY_SIZE(golden_settings_iceland_a11));
 738                amdgpu_device_program_register_sequence(adev,
 739                                                        iceland_golden_common_all,
 740                                                        ARRAY_SIZE(iceland_golden_common_all));
 741                break;
 742        case CHIP_FIJI:
 743                amdgpu_device_program_register_sequence(adev,
 744                                                        fiji_mgcg_cgcg_init,
 745                                                        ARRAY_SIZE(fiji_mgcg_cgcg_init));
 746                amdgpu_device_program_register_sequence(adev,
 747                                                        golden_settings_fiji_a10,
 748                                                        ARRAY_SIZE(golden_settings_fiji_a10));
 749                amdgpu_device_program_register_sequence(adev,
 750                                                        fiji_golden_common_all,
 751                                                        ARRAY_SIZE(fiji_golden_common_all));
 752                break;
 753
 754        case CHIP_TONGA:
 755                amdgpu_device_program_register_sequence(adev,
 756                                                        tonga_mgcg_cgcg_init,
 757                                                        ARRAY_SIZE(tonga_mgcg_cgcg_init));
 758                amdgpu_device_program_register_sequence(adev,
 759                                                        golden_settings_tonga_a11,
 760                                                        ARRAY_SIZE(golden_settings_tonga_a11));
 761                amdgpu_device_program_register_sequence(adev,
 762                                                        tonga_golden_common_all,
 763                                                        ARRAY_SIZE(tonga_golden_common_all));
 764                break;
 765        case CHIP_VEGAM:
 766                amdgpu_device_program_register_sequence(adev,
 767                                                        golden_settings_vegam_a11,
 768                                                        ARRAY_SIZE(golden_settings_vegam_a11));
 769                amdgpu_device_program_register_sequence(adev,
 770                                                        vegam_golden_common_all,
 771                                                        ARRAY_SIZE(vegam_golden_common_all));
 772                break;
 773        case CHIP_POLARIS11:
 774        case CHIP_POLARIS12:
 775                amdgpu_device_program_register_sequence(adev,
 776                                                        golden_settings_polaris11_a11,
 777                                                        ARRAY_SIZE(golden_settings_polaris11_a11));
 778                amdgpu_device_program_register_sequence(adev,
 779                                                        polaris11_golden_common_all,
 780                                                        ARRAY_SIZE(polaris11_golden_common_all));
 781                break;
 782        case CHIP_POLARIS10:
 783                amdgpu_device_program_register_sequence(adev,
 784                                                        golden_settings_polaris10_a11,
 785                                                        ARRAY_SIZE(golden_settings_polaris10_a11));
 786                amdgpu_device_program_register_sequence(adev,
 787                                                        polaris10_golden_common_all,
 788                                                        ARRAY_SIZE(polaris10_golden_common_all));
 789                WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 790                if (adev->pdev->revision == 0xc7 &&
 791                    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 792                     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 793                     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 794                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 795                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 796                }
 797                break;
 798        case CHIP_CARRIZO:
 799                amdgpu_device_program_register_sequence(adev,
 800                                                        cz_mgcg_cgcg_init,
 801                                                        ARRAY_SIZE(cz_mgcg_cgcg_init));
 802                amdgpu_device_program_register_sequence(adev,
 803                                                        cz_golden_settings_a11,
 804                                                        ARRAY_SIZE(cz_golden_settings_a11));
 805                amdgpu_device_program_register_sequence(adev,
 806                                                        cz_golden_common_all,
 807                                                        ARRAY_SIZE(cz_golden_common_all));
 808                break;
 809        case CHIP_STONEY:
 810                amdgpu_device_program_register_sequence(adev,
 811                                                        stoney_mgcg_cgcg_init,
 812                                                        ARRAY_SIZE(stoney_mgcg_cgcg_init));
 813                amdgpu_device_program_register_sequence(adev,
 814                                                        stoney_golden_settings_a11,
 815                                                        ARRAY_SIZE(stoney_golden_settings_a11));
 816                amdgpu_device_program_register_sequence(adev,
 817                                                        stoney_golden_common_all,
 818                                                        ARRAY_SIZE(stoney_golden_common_all));
 819                break;
 820        default:
 821                break;
 822        }
 823}
 824
 825static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 826{
 827        adev->gfx.scratch.num_reg = 8;
 828        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 829        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 830}
 831
 832static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 833{
 834        struct amdgpu_device *adev = ring->adev;
 835        uint32_t scratch;
 836        uint32_t tmp = 0;
 837        unsigned i;
 838        int r;
 839
 840        r = amdgpu_gfx_scratch_get(adev, &scratch);
 841        if (r)
 842                return r;
 843
 844        WREG32(scratch, 0xCAFEDEAD);
 845        r = amdgpu_ring_alloc(ring, 3);
 846        if (r)
 847                goto error_free_scratch;
 848
 849        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 850        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 851        amdgpu_ring_write(ring, 0xDEADBEEF);
 852        amdgpu_ring_commit(ring);
 853
 854        for (i = 0; i < adev->usec_timeout; i++) {
 855                tmp = RREG32(scratch);
 856                if (tmp == 0xDEADBEEF)
 857                        break;
 858                DRM_UDELAY(1);
 859        }
 860
 861        if (i >= adev->usec_timeout)
 862                r = -ETIMEDOUT;
 863
 864error_free_scratch:
 865        amdgpu_gfx_scratch_free(adev, scratch);
 866        return r;
 867}
 868
 869static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 870{
 871        struct amdgpu_device *adev = ring->adev;
 872        struct amdgpu_ib ib;
 873        struct dma_fence *f = NULL;
 874
 875        unsigned int index;
 876        uint64_t gpu_addr;
 877        uint32_t tmp;
 878        long r;
 879
 880        r = amdgpu_device_wb_get(adev, &index);
 881        if (r)
 882                return r;
 883
 884        gpu_addr = adev->wb.gpu_addr + (index * 4);
 885        adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 886        memset(&ib, 0, sizeof(ib));
 887        r = amdgpu_ib_get(adev, NULL, 16, &ib);
 888        if (r)
 889                goto err1;
 890
 891        ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 892        ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 893        ib.ptr[2] = lower_32_bits(gpu_addr);
 894        ib.ptr[3] = upper_32_bits(gpu_addr);
 895        ib.ptr[4] = 0xDEADBEEF;
 896        ib.length_dw = 5;
 897
 898        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 899        if (r)
 900                goto err2;
 901
 902        r = dma_fence_wait_timeout(f, false, timeout);
 903        if (r == 0) {
 904                r = -ETIMEDOUT;
 905                goto err2;
 906        } else if (r < 0) {
 907                goto err2;
 908        }
 909
 910        tmp = adev->wb.wb[index];
 911        if (tmp == 0xDEADBEEF)
 912                r = 0;
 913        else
 914                r = -EINVAL;
 915
 916err2:
 917        amdgpu_ib_free(adev, &ib, NULL);
 918        dma_fence_put(f);
 919err1:
 920        amdgpu_device_wb_free(adev, index);
 921        return r;
 922}
 923
 924
 925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 926{
 927        release_firmware(adev->gfx.pfp_fw);
 928        adev->gfx.pfp_fw = NULL;
 929        release_firmware(adev->gfx.me_fw);
 930        adev->gfx.me_fw = NULL;
 931        release_firmware(adev->gfx.ce_fw);
 932        adev->gfx.ce_fw = NULL;
 933        release_firmware(adev->gfx.rlc_fw);
 934        adev->gfx.rlc_fw = NULL;
 935        release_firmware(adev->gfx.mec_fw);
 936        adev->gfx.mec_fw = NULL;
 937        if ((adev->asic_type != CHIP_STONEY) &&
 938            (adev->asic_type != CHIP_TOPAZ))
 939                release_firmware(adev->gfx.mec2_fw);
 940        adev->gfx.mec2_fw = NULL;
 941
 942        kfree(adev->gfx.rlc.register_list_format);
 943}
 944
 945static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 946{
 947        const char *chip_name;
 948        char fw_name[30];
 949        int err;
 950        struct amdgpu_firmware_info *info = NULL;
 951        const struct common_firmware_header *header = NULL;
 952        const struct gfx_firmware_header_v1_0 *cp_hdr;
 953        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 954        unsigned int *tmp = NULL, i;
 955
 956        DRM_DEBUG("\n");
 957
 958        switch (adev->asic_type) {
 959        case CHIP_TOPAZ:
 960                chip_name = "topaz";
 961                break;
 962        case CHIP_TONGA:
 963                chip_name = "tonga";
 964                break;
 965        case CHIP_CARRIZO:
 966                chip_name = "carrizo";
 967                break;
 968        case CHIP_FIJI:
 969                chip_name = "fiji";
 970                break;
 971        case CHIP_STONEY:
 972                chip_name = "stoney";
 973                break;
 974        case CHIP_POLARIS10:
 975                chip_name = "polaris10";
 976                break;
 977        case CHIP_POLARIS11:
 978                chip_name = "polaris11";
 979                break;
 980        case CHIP_POLARIS12:
 981                chip_name = "polaris12";
 982                break;
 983        case CHIP_VEGAM:
 984                chip_name = "vegam";
 985                break;
 986        default:
 987                BUG();
 988        }
 989
 990        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 991                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 992                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 993                if (err == -ENOENT) {
 994                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 995                        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 996                }
 997        } else {
 998                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 999                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000        }
1001        if (err)
1002                goto out;
1003        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004        if (err)
1005                goto out;
1006        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013                if (err == -ENOENT) {
1014                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                }
1017        } else {
1018                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020        }
1021        if (err)
1022                goto out;
1023        err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024        if (err)
1025                goto out;
1026        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034                if (err == -ENOENT) {
1035                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                }
1038        } else {
1039                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041        }
1042        if (err)
1043                goto out;
1044        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045        if (err)
1046                goto out;
1047        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051        /*
1052         * Support for MCBP/Virtualization in combination with chained IBs is
1053         * formal released on feature version #46
1054         */
1055        if (adev->gfx.ce_feature_version >= 46 &&
1056            adev->gfx.pfp_feature_version >= 46) {
1057                adev->virt.chained_ib_support = true;
1058                DRM_INFO("Chained IB support enabled!\n");
1059        } else
1060                adev->virt.chained_ib_support = false;
1061
1062        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064        if (err)
1065                goto out;
1066        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071        adev->gfx.rlc.save_and_restore_offset =
1072                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073        adev->gfx.rlc.clear_state_descriptor_offset =
1074                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075        adev->gfx.rlc.avail_scratch_ram_locations =
1076                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077        adev->gfx.rlc.reg_restore_list_size =
1078                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079        adev->gfx.rlc.reg_list_format_start =
1080                        le32_to_cpu(rlc_hdr->reg_list_format_start);
1081        adev->gfx.rlc.reg_list_format_separate_start =
1082                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083        adev->gfx.rlc.starting_offsets_start =
1084                        le32_to_cpu(rlc_hdr->starting_offsets_start);
1085        adev->gfx.rlc.reg_list_format_size_bytes =
1086                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087        adev->gfx.rlc.reg_list_size_bytes =
1088                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090        adev->gfx.rlc.register_list_format =
1091                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094        if (!adev->gfx.rlc.register_list_format) {
1095                err = -ENOMEM;
1096                goto out;
1097        }
1098
1099        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101        for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103
1104        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108        for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114                if (err == -ENOENT) {
1115                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                }
1118        } else {
1119                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121        }
1122        if (err)
1123                goto out;
1124        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125        if (err)
1126                goto out;
1127        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131        if ((adev->asic_type != CHIP_STONEY) &&
1132            (adev->asic_type != CHIP_TOPAZ)) {
1133                if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136                        if (err == -ENOENT) {
1137                                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                        }
1140                } else {
1141                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                }
1144                if (!err) {
1145                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146                        if (err)
1147                                goto out;
1148                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                                adev->gfx.mec2_fw->data;
1150                        adev->gfx.mec2_fw_version =
1151                                le32_to_cpu(cp_hdr->header.ucode_version);
1152                        adev->gfx.mec2_feature_version =
1153                                le32_to_cpu(cp_hdr->ucode_feature_version);
1154                } else {
1155                        err = 0;
1156                        adev->gfx.mec2_fw = NULL;
1157                }
1158        }
1159
1160        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161        info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162        info->fw = adev->gfx.pfp_fw;
1163        header = (const struct common_firmware_header *)info->fw->data;
1164        adev->firmware.fw_size +=
1165                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168        info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169        info->fw = adev->gfx.me_fw;
1170        header = (const struct common_firmware_header *)info->fw->data;
1171        adev->firmware.fw_size +=
1172                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175        info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176        info->fw = adev->gfx.ce_fw;
1177        header = (const struct common_firmware_header *)info->fw->data;
1178        adev->firmware.fw_size +=
1179                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182        info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183        info->fw = adev->gfx.rlc_fw;
1184        header = (const struct common_firmware_header *)info->fw->data;
1185        adev->firmware.fw_size +=
1186                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190        info->fw = adev->gfx.mec_fw;
1191        header = (const struct common_firmware_header *)info->fw->data;
1192        adev->firmware.fw_size +=
1193                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195        /* we need account JT in */
1196        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197        adev->firmware.fw_size +=
1198                ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199
1200        if (amdgpu_sriov_vf(adev)) {
1201                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202                info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203                info->fw = adev->gfx.mec_fw;
1204                adev->firmware.fw_size +=
1205                        ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206        }
1207
1208        if (adev->gfx.mec2_fw) {
1209                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211                info->fw = adev->gfx.mec2_fw;
1212                header = (const struct common_firmware_header *)info->fw->data;
1213                adev->firmware.fw_size +=
1214                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215        }
1216
1217out:
1218        if (err) {
1219                dev_err(adev->dev,
1220                        "gfx8: Failed to load firmware \"%s\"\n",
1221                        fw_name);
1222                release_firmware(adev->gfx.pfp_fw);
1223                adev->gfx.pfp_fw = NULL;
1224                release_firmware(adev->gfx.me_fw);
1225                adev->gfx.me_fw = NULL;
1226                release_firmware(adev->gfx.ce_fw);
1227                adev->gfx.ce_fw = NULL;
1228                release_firmware(adev->gfx.rlc_fw);
1229                adev->gfx.rlc_fw = NULL;
1230                release_firmware(adev->gfx.mec_fw);
1231                adev->gfx.mec_fw = NULL;
1232                release_firmware(adev->gfx.mec2_fw);
1233                adev->gfx.mec2_fw = NULL;
1234        }
1235        return err;
1236}
1237
1238static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                                    volatile u32 *buffer)
1240{
1241        u32 count = 0, i;
1242        const struct cs_section_def *sect = NULL;
1243        const struct cs_extent_def *ext = NULL;
1244
1245        if (adev->gfx.rlc.cs_data == NULL)
1246                return;
1247        if (buffer == NULL)
1248                return;
1249
1250        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254        buffer[count++] = cpu_to_le32(0x80000000);
1255        buffer[count++] = cpu_to_le32(0x80000000);
1256
1257        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258                for (ext = sect->section; ext->extent != NULL; ++ext) {
1259                        if (sect->id == SECT_CONTEXT) {
1260                                buffer[count++] =
1261                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                                buffer[count++] = cpu_to_le32(ext->reg_index -
1263                                                PACKET3_SET_CONTEXT_REG_START);
1264                                for (i = 0; i < ext->reg_count; i++)
1265                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1266                        } else {
1267                                return;
1268                        }
1269                }
1270        }
1271
1272        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274                        PACKET3_SET_CONTEXT_REG_START);
1275        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282        buffer[count++] = cpu_to_le32(0);
1283}
1284
1285static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286{
1287        if (adev->asic_type == CHIP_CARRIZO)
1288                return 5;
1289        else
1290                return 4;
1291}
1292
1293static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294{
1295        const struct cs_section_def *cs_data;
1296        int r;
1297
1298        adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300        cs_data = adev->gfx.rlc.cs_data;
1301
1302        if (cs_data) {
1303                /* init clear state block */
1304                r = amdgpu_gfx_rlc_init_csb(adev);
1305                if (r)
1306                        return r;
1307        }
1308
1309        if ((adev->asic_type == CHIP_CARRIZO) ||
1310            (adev->asic_type == CHIP_STONEY)) {
1311                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312                r = amdgpu_gfx_rlc_init_cpt(adev);
1313                if (r)
1314                        return r;
1315        }
1316
1317        return 0;
1318}
1319
1320static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1321{
1322        amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1323}
1324
1325static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1326{
1327        int r;
1328        u32 *hpd;
1329        size_t mec_hpd_size;
1330
1331        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1332
1333        /* take ownership of the relevant compute queues */
1334        amdgpu_gfx_compute_queue_acquire(adev);
1335
1336        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1337
1338        r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1339                                      AMDGPU_GEM_DOMAIN_VRAM,
1340                                      &adev->gfx.mec.hpd_eop_obj,
1341                                      &adev->gfx.mec.hpd_eop_gpu_addr,
1342                                      (void **)&hpd);
1343        if (r) {
1344                dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1345                return r;
1346        }
1347
1348        memset(hpd, 0, mec_hpd_size);
1349
1350        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1351        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1352
1353        return 0;
1354}
1355
1356static const u32 vgpr_init_compute_shader[] =
1357{
1358        0x7e000209, 0x7e020208,
1359        0x7e040207, 0x7e060206,
1360        0x7e080205, 0x7e0a0204,
1361        0x7e0c0203, 0x7e0e0202,
1362        0x7e100201, 0x7e120200,
1363        0x7e140209, 0x7e160208,
1364        0x7e180207, 0x7e1a0206,
1365        0x7e1c0205, 0x7e1e0204,
1366        0x7e200203, 0x7e220202,
1367        0x7e240201, 0x7e260200,
1368        0x7e280209, 0x7e2a0208,
1369        0x7e2c0207, 0x7e2e0206,
1370        0x7e300205, 0x7e320204,
1371        0x7e340203, 0x7e360202,
1372        0x7e380201, 0x7e3a0200,
1373        0x7e3c0209, 0x7e3e0208,
1374        0x7e400207, 0x7e420206,
1375        0x7e440205, 0x7e460204,
1376        0x7e480203, 0x7e4a0202,
1377        0x7e4c0201, 0x7e4e0200,
1378        0x7e500209, 0x7e520208,
1379        0x7e540207, 0x7e560206,
1380        0x7e580205, 0x7e5a0204,
1381        0x7e5c0203, 0x7e5e0202,
1382        0x7e600201, 0x7e620200,
1383        0x7e640209, 0x7e660208,
1384        0x7e680207, 0x7e6a0206,
1385        0x7e6c0205, 0x7e6e0204,
1386        0x7e700203, 0x7e720202,
1387        0x7e740201, 0x7e760200,
1388        0x7e780209, 0x7e7a0208,
1389        0x7e7c0207, 0x7e7e0206,
1390        0xbf8a0000, 0xbf810000,
1391};
1392
1393static const u32 sgpr_init_compute_shader[] =
1394{
1395        0xbe8a0100, 0xbe8c0102,
1396        0xbe8e0104, 0xbe900106,
1397        0xbe920108, 0xbe940100,
1398        0xbe960102, 0xbe980104,
1399        0xbe9a0106, 0xbe9c0108,
1400        0xbe9e0100, 0xbea00102,
1401        0xbea20104, 0xbea40106,
1402        0xbea60108, 0xbea80100,
1403        0xbeaa0102, 0xbeac0104,
1404        0xbeae0106, 0xbeb00108,
1405        0xbeb20100, 0xbeb40102,
1406        0xbeb60104, 0xbeb80106,
1407        0xbeba0108, 0xbebc0100,
1408        0xbebe0102, 0xbec00104,
1409        0xbec20106, 0xbec40108,
1410        0xbec60100, 0xbec80102,
1411        0xbee60004, 0xbee70005,
1412        0xbeea0006, 0xbeeb0007,
1413        0xbee80008, 0xbee90009,
1414        0xbefc0000, 0xbf8a0000,
1415        0xbf810000, 0x00000000,
1416};
1417
1418static const u32 vgpr_init_regs[] =
1419{
1420        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1421        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1422        mmCOMPUTE_NUM_THREAD_X, 256*4,
1423        mmCOMPUTE_NUM_THREAD_Y, 1,
1424        mmCOMPUTE_NUM_THREAD_Z, 1,
1425        mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1426        mmCOMPUTE_PGM_RSRC2, 20,
1427        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1428        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1429        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1430        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1431        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1432        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1433        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1434        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1435        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1436        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1437};
1438
1439static const u32 sgpr1_init_regs[] =
1440{
1441        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1442        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1443        mmCOMPUTE_NUM_THREAD_X, 256*5,
1444        mmCOMPUTE_NUM_THREAD_Y, 1,
1445        mmCOMPUTE_NUM_THREAD_Z, 1,
1446        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1447        mmCOMPUTE_PGM_RSRC2, 20,
1448        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1449        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1450        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1451        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1452        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1453        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1454        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1455        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1456        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1457        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1458};
1459
1460static const u32 sgpr2_init_regs[] =
1461{
1462        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1463        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1464        mmCOMPUTE_NUM_THREAD_X, 256*5,
1465        mmCOMPUTE_NUM_THREAD_Y, 1,
1466        mmCOMPUTE_NUM_THREAD_Z, 1,
1467        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1468        mmCOMPUTE_PGM_RSRC2, 20,
1469        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1470        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1471        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1472        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1473        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1474        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1475        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1476        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1477        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1478        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479};
1480
1481static const u32 sec_ded_counter_registers[] =
1482{
1483        mmCPC_EDC_ATC_CNT,
1484        mmCPC_EDC_SCRATCH_CNT,
1485        mmCPC_EDC_UCODE_CNT,
1486        mmCPF_EDC_ATC_CNT,
1487        mmCPF_EDC_ROQ_CNT,
1488        mmCPF_EDC_TAG_CNT,
1489        mmCPG_EDC_ATC_CNT,
1490        mmCPG_EDC_DMA_CNT,
1491        mmCPG_EDC_TAG_CNT,
1492        mmDC_EDC_CSINVOC_CNT,
1493        mmDC_EDC_RESTORE_CNT,
1494        mmDC_EDC_STATE_CNT,
1495        mmGDS_EDC_CNT,
1496        mmGDS_EDC_GRBM_CNT,
1497        mmGDS_EDC_OA_DED,
1498        mmSPI_EDC_CNT,
1499        mmSQC_ATC_EDC_GATCL1_CNT,
1500        mmSQC_EDC_CNT,
1501        mmSQ_EDC_DED_CNT,
1502        mmSQ_EDC_INFO,
1503        mmSQ_EDC_SEC_CNT,
1504        mmTCC_EDC_CNT,
1505        mmTCP_ATC_EDC_GATCL1_CNT,
1506        mmTCP_EDC_CNT,
1507        mmTD_EDC_CNT
1508};
1509
1510static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1511{
1512        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1513        struct amdgpu_ib ib;
1514        struct dma_fence *f = NULL;
1515        int r, i;
1516        u32 tmp;
1517        unsigned total_size, vgpr_offset, sgpr_offset;
1518        u64 gpu_addr;
1519
1520        /* only supported on CZ */
1521        if (adev->asic_type != CHIP_CARRIZO)
1522                return 0;
1523
1524        /* bail if the compute ring is not ready */
1525        if (!ring->sched.ready)
1526                return 0;
1527
1528        tmp = RREG32(mmGB_EDC_MODE);
1529        WREG32(mmGB_EDC_MODE, 0);
1530
1531        total_size =
1532                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1533        total_size +=
1534                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1535        total_size +=
1536                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1537        total_size = ALIGN(total_size, 256);
1538        vgpr_offset = total_size;
1539        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1540        sgpr_offset = total_size;
1541        total_size += sizeof(sgpr_init_compute_shader);
1542
1543        /* allocate an indirect buffer to put the commands in */
1544        memset(&ib, 0, sizeof(ib));
1545        r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1546        if (r) {
1547                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1548                return r;
1549        }
1550
1551        /* load the compute shaders */
1552        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1553                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1554
1555        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1556                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1557
1558        /* init the ib length to 0 */
1559        ib.length_dw = 0;
1560
1561        /* VGPR */
1562        /* write the register state for the compute dispatch */
1563        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1564                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1566                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1567        }
1568        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1570        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574
1575        /* write dispatch packet */
1576        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577        ib.ptr[ib.length_dw++] = 8; /* x */
1578        ib.ptr[ib.length_dw++] = 1; /* y */
1579        ib.ptr[ib.length_dw++] = 1; /* z */
1580        ib.ptr[ib.length_dw++] =
1581                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582
1583        /* write CS partial flush packet */
1584        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586
1587        /* SGPR1 */
1588        /* write the register state for the compute dispatch */
1589        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1590                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1591                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1592                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1593        }
1594        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1595        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1596        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1597        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1598        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1599        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1600
1601        /* write dispatch packet */
1602        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1603        ib.ptr[ib.length_dw++] = 8; /* x */
1604        ib.ptr[ib.length_dw++] = 1; /* y */
1605        ib.ptr[ib.length_dw++] = 1; /* z */
1606        ib.ptr[ib.length_dw++] =
1607                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1608
1609        /* write CS partial flush packet */
1610        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1611        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1612
1613        /* SGPR2 */
1614        /* write the register state for the compute dispatch */
1615        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1616                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1617                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1618                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1619        }
1620        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1621        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1622        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1623        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1624        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1625        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1626
1627        /* write dispatch packet */
1628        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1629        ib.ptr[ib.length_dw++] = 8; /* x */
1630        ib.ptr[ib.length_dw++] = 1; /* y */
1631        ib.ptr[ib.length_dw++] = 1; /* z */
1632        ib.ptr[ib.length_dw++] =
1633                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1634
1635        /* write CS partial flush packet */
1636        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1637        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1638
1639        /* shedule the ib on the ring */
1640        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1641        if (r) {
1642                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1643                goto fail;
1644        }
1645
1646        /* wait for the GPU to finish processing the IB */
1647        r = dma_fence_wait(f, false);
1648        if (r) {
1649                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1650                goto fail;
1651        }
1652
1653        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1654        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1655        WREG32(mmGB_EDC_MODE, tmp);
1656
1657        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1658        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1659        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1660
1661
1662        /* read back registers to clear the counters */
1663        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1664                RREG32(sec_ded_counter_registers[i]);
1665
1666fail:
1667        amdgpu_ib_free(adev, &ib, NULL);
1668        dma_fence_put(f);
1669
1670        return r;
1671}
1672
1673static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1674{
1675        u32 gb_addr_config;
1676        u32 mc_shared_chmap, mc_arb_ramcfg;
1677        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1678        u32 tmp;
1679        int ret;
1680
1681        switch (adev->asic_type) {
1682        case CHIP_TOPAZ:
1683                adev->gfx.config.max_shader_engines = 1;
1684                adev->gfx.config.max_tile_pipes = 2;
1685                adev->gfx.config.max_cu_per_sh = 6;
1686                adev->gfx.config.max_sh_per_se = 1;
1687                adev->gfx.config.max_backends_per_se = 2;
1688                adev->gfx.config.max_texture_channel_caches = 2;
1689                adev->gfx.config.max_gprs = 256;
1690                adev->gfx.config.max_gs_threads = 32;
1691                adev->gfx.config.max_hw_contexts = 8;
1692
1693                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1694                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1695                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1696                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1697                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1698                break;
1699        case CHIP_FIJI:
1700                adev->gfx.config.max_shader_engines = 4;
1701                adev->gfx.config.max_tile_pipes = 16;
1702                adev->gfx.config.max_cu_per_sh = 16;
1703                adev->gfx.config.max_sh_per_se = 1;
1704                adev->gfx.config.max_backends_per_se = 4;
1705                adev->gfx.config.max_texture_channel_caches = 16;
1706                adev->gfx.config.max_gprs = 256;
1707                adev->gfx.config.max_gs_threads = 32;
1708                adev->gfx.config.max_hw_contexts = 8;
1709
1710                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1711                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1712                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1713                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1714                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1715                break;
1716        case CHIP_POLARIS11:
1717        case CHIP_POLARIS12:
1718                ret = amdgpu_atombios_get_gfx_info(adev);
1719                if (ret)
1720                        return ret;
1721                adev->gfx.config.max_gprs = 256;
1722                adev->gfx.config.max_gs_threads = 32;
1723                adev->gfx.config.max_hw_contexts = 8;
1724
1725                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1726                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1727                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1728                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1729                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1730                break;
1731        case CHIP_POLARIS10:
1732        case CHIP_VEGAM:
1733                ret = amdgpu_atombios_get_gfx_info(adev);
1734                if (ret)
1735                        return ret;
1736                adev->gfx.config.max_gprs = 256;
1737                adev->gfx.config.max_gs_threads = 32;
1738                adev->gfx.config.max_hw_contexts = 8;
1739
1740                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1741                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1742                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1743                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1744                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1745                break;
1746        case CHIP_TONGA:
1747                adev->gfx.config.max_shader_engines = 4;
1748                adev->gfx.config.max_tile_pipes = 8;
1749                adev->gfx.config.max_cu_per_sh = 8;
1750                adev->gfx.config.max_sh_per_se = 1;
1751                adev->gfx.config.max_backends_per_se = 2;
1752                adev->gfx.config.max_texture_channel_caches = 8;
1753                adev->gfx.config.max_gprs = 256;
1754                adev->gfx.config.max_gs_threads = 32;
1755                adev->gfx.config.max_hw_contexts = 8;
1756
1757                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1762                break;
1763        case CHIP_CARRIZO:
1764                adev->gfx.config.max_shader_engines = 1;
1765                adev->gfx.config.max_tile_pipes = 2;
1766                adev->gfx.config.max_sh_per_se = 1;
1767                adev->gfx.config.max_backends_per_se = 2;
1768                adev->gfx.config.max_cu_per_sh = 8;
1769                adev->gfx.config.max_texture_channel_caches = 2;
1770                adev->gfx.config.max_gprs = 256;
1771                adev->gfx.config.max_gs_threads = 32;
1772                adev->gfx.config.max_hw_contexts = 8;
1773
1774                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1779                break;
1780        case CHIP_STONEY:
1781                adev->gfx.config.max_shader_engines = 1;
1782                adev->gfx.config.max_tile_pipes = 2;
1783                adev->gfx.config.max_sh_per_se = 1;
1784                adev->gfx.config.max_backends_per_se = 1;
1785                adev->gfx.config.max_cu_per_sh = 3;
1786                adev->gfx.config.max_texture_channel_caches = 2;
1787                adev->gfx.config.max_gprs = 256;
1788                adev->gfx.config.max_gs_threads = 16;
1789                adev->gfx.config.max_hw_contexts = 8;
1790
1791                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1792                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1793                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1794                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1795                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1796                break;
1797        default:
1798                adev->gfx.config.max_shader_engines = 2;
1799                adev->gfx.config.max_tile_pipes = 4;
1800                adev->gfx.config.max_cu_per_sh = 2;
1801                adev->gfx.config.max_sh_per_se = 1;
1802                adev->gfx.config.max_backends_per_se = 2;
1803                adev->gfx.config.max_texture_channel_caches = 4;
1804                adev->gfx.config.max_gprs = 256;
1805                adev->gfx.config.max_gs_threads = 32;
1806                adev->gfx.config.max_hw_contexts = 8;
1807
1808                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1809                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1810                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1811                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1812                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1813                break;
1814        }
1815
1816        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1817        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1818        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1819
1820        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1821        adev->gfx.config.mem_max_burst_length_bytes = 256;
1822        if (adev->flags & AMD_IS_APU) {
1823                /* Get memory bank mapping mode. */
1824                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1825                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1826                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1827
1828                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1829                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1830                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1831
1832                /* Validate settings in case only one DIMM installed. */
1833                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1834                        dimm00_addr_map = 0;
1835                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1836                        dimm01_addr_map = 0;
1837                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1838                        dimm10_addr_map = 0;
1839                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1840                        dimm11_addr_map = 0;
1841
1842                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1843                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1844                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1845                        adev->gfx.config.mem_row_size_in_kb = 2;
1846                else
1847                        adev->gfx.config.mem_row_size_in_kb = 1;
1848        } else {
1849                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1850                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1851                if (adev->gfx.config.mem_row_size_in_kb > 4)
1852                        adev->gfx.config.mem_row_size_in_kb = 4;
1853        }
1854
1855        adev->gfx.config.shader_engine_tile_size = 32;
1856        adev->gfx.config.num_gpus = 1;
1857        adev->gfx.config.multi_gpu_tile_size = 64;
1858
1859        /* fix up row size */
1860        switch (adev->gfx.config.mem_row_size_in_kb) {
1861        case 1:
1862        default:
1863                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1864                break;
1865        case 2:
1866                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1867                break;
1868        case 4:
1869                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1870                break;
1871        }
1872        adev->gfx.config.gb_addr_config = gb_addr_config;
1873
1874        return 0;
1875}
1876
1877static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1878                                        int mec, int pipe, int queue)
1879{
1880        int r;
1881        unsigned irq_type;
1882        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1883
1884        ring = &adev->gfx.compute_ring[ring_id];
1885
1886        /* mec0 is me1 */
1887        ring->me = mec + 1;
1888        ring->pipe = pipe;
1889        ring->queue = queue;
1890
1891        ring->ring_obj = NULL;
1892        ring->use_doorbell = true;
1893        ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1894        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1895                                + (ring_id * GFX8_MEC_HPD_SIZE);
1896        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1897
1898        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1899                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1900                + ring->pipe;
1901
1902        /* type-2 packets are deprecated on MEC, use type-3 instead */
1903        r = amdgpu_ring_init(adev, ring, 1024,
1904                        &adev->gfx.eop_irq, irq_type);
1905        if (r)
1906                return r;
1907
1908
1909        return 0;
1910}
1911
1912static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1913
1914static int gfx_v8_0_sw_init(void *handle)
1915{
1916        int i, j, k, r, ring_id;
1917        struct amdgpu_ring *ring;
1918        struct amdgpu_kiq *kiq;
1919        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1920
1921        switch (adev->asic_type) {
1922        case CHIP_TONGA:
1923        case CHIP_CARRIZO:
1924        case CHIP_FIJI:
1925        case CHIP_POLARIS10:
1926        case CHIP_POLARIS11:
1927        case CHIP_POLARIS12:
1928        case CHIP_VEGAM:
1929                adev->gfx.mec.num_mec = 2;
1930                break;
1931        case CHIP_TOPAZ:
1932        case CHIP_STONEY:
1933        default:
1934                adev->gfx.mec.num_mec = 1;
1935                break;
1936        }
1937
1938        adev->gfx.mec.num_pipe_per_mec = 4;
1939        adev->gfx.mec.num_queue_per_pipe = 8;
1940
1941        /* EOP Event */
1942        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1943        if (r)
1944                return r;
1945
1946        /* Privileged reg */
1947        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1948                              &adev->gfx.priv_reg_irq);
1949        if (r)
1950                return r;
1951
1952        /* Privileged inst */
1953        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1954                              &adev->gfx.priv_inst_irq);
1955        if (r)
1956                return r;
1957
1958        /* Add CP EDC/ECC irq  */
1959        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1960                              &adev->gfx.cp_ecc_error_irq);
1961        if (r)
1962                return r;
1963
1964        /* SQ interrupts. */
1965        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1966                              &adev->gfx.sq_irq);
1967        if (r) {
1968                DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1969                return r;
1970        }
1971
1972        INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1973
1974        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1975
1976        gfx_v8_0_scratch_init(adev);
1977
1978        r = gfx_v8_0_init_microcode(adev);
1979        if (r) {
1980                DRM_ERROR("Failed to load gfx firmware!\n");
1981                return r;
1982        }
1983
1984        r = adev->gfx.rlc.funcs->init(adev);
1985        if (r) {
1986                DRM_ERROR("Failed to init rlc BOs!\n");
1987                return r;
1988        }
1989
1990        r = gfx_v8_0_mec_init(adev);
1991        if (r) {
1992                DRM_ERROR("Failed to init MEC BOs!\n");
1993                return r;
1994        }
1995
1996        /* set up the gfx ring */
1997        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1998                ring = &adev->gfx.gfx_ring[i];
1999                ring->ring_obj = NULL;
2000                sprintf(ring->name, "gfx");
2001                /* no gfx doorbells on iceland */
2002                if (adev->asic_type != CHIP_TOPAZ) {
2003                        ring->use_doorbell = true;
2004                        ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2005                }
2006
2007                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2008                                     AMDGPU_CP_IRQ_GFX_EOP);
2009                if (r)
2010                        return r;
2011        }
2012
2013
2014        /* set up the compute queues - allocate horizontally across pipes */
2015        ring_id = 0;
2016        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2017                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2018                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2019                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2020                                        continue;
2021
2022                                r = gfx_v8_0_compute_ring_init(adev,
2023                                                                ring_id,
2024                                                                i, k, j);
2025                                if (r)
2026                                        return r;
2027
2028                                ring_id++;
2029                        }
2030                }
2031        }
2032
2033        r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2034        if (r) {
2035                DRM_ERROR("Failed to init KIQ BOs!\n");
2036                return r;
2037        }
2038
2039        kiq = &adev->gfx.kiq;
2040        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2041        if (r)
2042                return r;
2043
2044        /* create MQD for all compute queues as well as KIQ for SRIOV case */
2045        r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2046        if (r)
2047                return r;
2048
2049        adev->gfx.ce_ram_size = 0x8000;
2050
2051        r = gfx_v8_0_gpu_early_init(adev);
2052        if (r)
2053                return r;
2054
2055        return 0;
2056}
2057
2058static int gfx_v8_0_sw_fini(void *handle)
2059{
2060        int i;
2061        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2062
2063        amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2064        amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2065        amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2066
2067        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2068                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2069        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2070                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2071
2072        amdgpu_gfx_compute_mqd_sw_fini(adev);
2073        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2074        amdgpu_gfx_kiq_fini(adev);
2075
2076        gfx_v8_0_mec_fini(adev);
2077        amdgpu_gfx_rlc_fini(adev);
2078        amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2079                                &adev->gfx.rlc.clear_state_gpu_addr,
2080                                (void **)&adev->gfx.rlc.cs_ptr);
2081        if ((adev->asic_type == CHIP_CARRIZO) ||
2082            (adev->asic_type == CHIP_STONEY)) {
2083                amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2084                                &adev->gfx.rlc.cp_table_gpu_addr,
2085                                (void **)&adev->gfx.rlc.cp_table_ptr);
2086        }
2087        gfx_v8_0_free_microcode(adev);
2088
2089        return 0;
2090}
2091
2092static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2093{
2094        uint32_t *modearray, *mod2array;
2095        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2096        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2097        u32 reg_offset;
2098
2099        modearray = adev->gfx.config.tile_mode_array;
2100        mod2array = adev->gfx.config.macrotile_mode_array;
2101
2102        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2103                modearray[reg_offset] = 0;
2104
2105        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2106                mod2array[reg_offset] = 0;
2107
2108        switch (adev->asic_type) {
2109        case CHIP_TOPAZ:
2110                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111                                PIPE_CONFIG(ADDR_SURF_P2) |
2112                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2113                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2114                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                PIPE_CONFIG(ADDR_SURF_P2) |
2116                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2117                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2118                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                PIPE_CONFIG(ADDR_SURF_P2) |
2120                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2121                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                PIPE_CONFIG(ADDR_SURF_P2) |
2124                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2125                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                PIPE_CONFIG(ADDR_SURF_P2) |
2128                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2129                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                PIPE_CONFIG(ADDR_SURF_P2) |
2132                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2133                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2135                                PIPE_CONFIG(ADDR_SURF_P2) |
2136                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2139                                PIPE_CONFIG(ADDR_SURF_P2));
2140                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                PIPE_CONFIG(ADDR_SURF_P2) |
2142                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2143                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2155                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2168                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2175                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2176                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2179                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2185                                 PIPE_CONFIG(ADDR_SURF_P2) |
2186                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2189                                 PIPE_CONFIG(ADDR_SURF_P2) |
2190                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2193                                 PIPE_CONFIG(ADDR_SURF_P2) |
2194                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2197                                 PIPE_CONFIG(ADDR_SURF_P2) |
2198                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201                                 PIPE_CONFIG(ADDR_SURF_P2) |
2202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                 PIPE_CONFIG(ADDR_SURF_P2) |
2206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2207                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P2) |
2210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2212
2213                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2215                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216                                NUM_BANKS(ADDR_SURF_8_BANK));
2217                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2220                                NUM_BANKS(ADDR_SURF_8_BANK));
2221                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                NUM_BANKS(ADDR_SURF_8_BANK));
2225                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228                                NUM_BANKS(ADDR_SURF_8_BANK));
2229                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                NUM_BANKS(ADDR_SURF_8_BANK));
2233                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236                                NUM_BANKS(ADDR_SURF_8_BANK));
2237                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                NUM_BANKS(ADDR_SURF_8_BANK));
2241                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2243                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2244                                NUM_BANKS(ADDR_SURF_16_BANK));
2245                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2246                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2248                                NUM_BANKS(ADDR_SURF_16_BANK));
2249                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                 NUM_BANKS(ADDR_SURF_16_BANK));
2253                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                 NUM_BANKS(ADDR_SURF_16_BANK));
2257                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                 NUM_BANKS(ADDR_SURF_16_BANK));
2261                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                 NUM_BANKS(ADDR_SURF_16_BANK));
2265                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2268                                 NUM_BANKS(ADDR_SURF_8_BANK));
2269
2270                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2271                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2272                            reg_offset != 23)
2273                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2274
2275                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2276                        if (reg_offset != 7)
2277                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2278
2279                break;
2280        case CHIP_FIJI:
2281        case CHIP_VEGAM:
2282                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2285                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2289                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2293                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2297                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2303                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2311                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2312                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2315                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2316                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2329                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2332                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2341                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2353                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2357                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2361                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2365                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2369                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2373                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2377                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2381                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2385                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2397                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2400                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2404
2405                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408                                NUM_BANKS(ADDR_SURF_8_BANK));
2409                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412                                NUM_BANKS(ADDR_SURF_8_BANK));
2413                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                NUM_BANKS(ADDR_SURF_8_BANK));
2417                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                NUM_BANKS(ADDR_SURF_8_BANK));
2421                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                NUM_BANKS(ADDR_SURF_8_BANK));
2425                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428                                NUM_BANKS(ADDR_SURF_8_BANK));
2429                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                NUM_BANKS(ADDR_SURF_8_BANK));
2433                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2435                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                NUM_BANKS(ADDR_SURF_8_BANK));
2437                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                NUM_BANKS(ADDR_SURF_8_BANK));
2441                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                 NUM_BANKS(ADDR_SURF_4_BANK));
2461
2462                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2463                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2464
2465                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2466                        if (reg_offset != 7)
2467                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2468
2469                break;
2470        case CHIP_TONGA:
2471                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2474                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2478                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2482                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2486                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2492                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2504                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2505                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2530                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2542                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2546                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2550                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2554                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2558                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2562                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2566                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2570                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2574                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2578                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2589                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2590                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2593
2594                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597                                NUM_BANKS(ADDR_SURF_16_BANK));
2598                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601                                NUM_BANKS(ADDR_SURF_16_BANK));
2602                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                NUM_BANKS(ADDR_SURF_16_BANK));
2606                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                NUM_BANKS(ADDR_SURF_16_BANK));
2610                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2613                                NUM_BANKS(ADDR_SURF_16_BANK));
2614                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617                                NUM_BANKS(ADDR_SURF_16_BANK));
2618                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2621                                NUM_BANKS(ADDR_SURF_16_BANK));
2622                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2624                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                NUM_BANKS(ADDR_SURF_16_BANK));
2626                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                NUM_BANKS(ADDR_SURF_16_BANK));
2630                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                 NUM_BANKS(ADDR_SURF_8_BANK));
2642                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                 NUM_BANKS(ADDR_SURF_4_BANK));
2646                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                 NUM_BANKS(ADDR_SURF_4_BANK));
2650
2651                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2652                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2653
2654                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2655                        if (reg_offset != 7)
2656                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2657
2658                break;
2659        case CHIP_POLARIS11:
2660        case CHIP_POLARIS12:
2661                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2664                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2668                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2676                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2682                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2690                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2694                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2695                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2720                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2732                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2736                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2740                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2744                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2748                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2752                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2756                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2760                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2764                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2775                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2776                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2779                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2783
2784                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787                                NUM_BANKS(ADDR_SURF_16_BANK));
2788
2789                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792                                NUM_BANKS(ADDR_SURF_16_BANK));
2793
2794                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2796                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797                                NUM_BANKS(ADDR_SURF_16_BANK));
2798
2799                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2801                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2802                                NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2807                                NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812                                NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817                                NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2821                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822                                NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2825                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842                                NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                NUM_BANKS(ADDR_SURF_8_BANK));
2848
2849                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2852                                NUM_BANKS(ADDR_SURF_4_BANK));
2853
2854                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2855                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2856
2857                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2858                        if (reg_offset != 7)
2859                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2860
2861                break;
2862        case CHIP_POLARIS10:
2863                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2866                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2870                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2884                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2892                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2896                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2897                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2922                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2929                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2934                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2938                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2942                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2946                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2950                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2954                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2958                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2962                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2966                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2974                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2981                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2982                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2983                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2985
2986                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2988                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2989                                NUM_BANKS(ADDR_SURF_16_BANK));
2990
2991                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                NUM_BANKS(ADDR_SURF_16_BANK));
2995
2996                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                NUM_BANKS(ADDR_SURF_16_BANK));
3000
3001                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3003                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004                                NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3009                                NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3014                                NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3018                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3019                                NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3023                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024                                NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029                                NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3033                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3034                                NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039                                NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3044                                NUM_BANKS(ADDR_SURF_8_BANK));
3045
3046                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049                                NUM_BANKS(ADDR_SURF_4_BANK));
3050
3051                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                NUM_BANKS(ADDR_SURF_4_BANK));
3055
3056                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3057                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3058
3059                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3060                        if (reg_offset != 7)
3061                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3062
3063                break;
3064        case CHIP_STONEY:
3065                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                PIPE_CONFIG(ADDR_SURF_P2) |
3067                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3068                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                PIPE_CONFIG(ADDR_SURF_P2) |
3071                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3072                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                PIPE_CONFIG(ADDR_SURF_P2) |
3075                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3076                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                PIPE_CONFIG(ADDR_SURF_P2) |
3079                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3080                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                PIPE_CONFIG(ADDR_SURF_P2) |
3083                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3086                                PIPE_CONFIG(ADDR_SURF_P2) |
3087                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3088                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3090                                PIPE_CONFIG(ADDR_SURF_P2) |
3091                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3094                                PIPE_CONFIG(ADDR_SURF_P2));
3095                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3096                                PIPE_CONFIG(ADDR_SURF_P2) |
3097                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3099                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3100                                 PIPE_CONFIG(ADDR_SURF_P2) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3102                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3107                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3115                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P2) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3123                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2) |
3133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2) |
3137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3144                                 PIPE_CONFIG(ADDR_SURF_P2) |
3145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3148                                 PIPE_CONFIG(ADDR_SURF_P2) |
3149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3152                                 PIPE_CONFIG(ADDR_SURF_P2) |
3153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3156                                 PIPE_CONFIG(ADDR_SURF_P2) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                 PIPE_CONFIG(ADDR_SURF_P2) |
3161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3162                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                 PIPE_CONFIG(ADDR_SURF_P2) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167
3168                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3170                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3171                                NUM_BANKS(ADDR_SURF_8_BANK));
3172                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3175                                NUM_BANKS(ADDR_SURF_8_BANK));
3176                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179                                NUM_BANKS(ADDR_SURF_8_BANK));
3180                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183                                NUM_BANKS(ADDR_SURF_8_BANK));
3184                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                NUM_BANKS(ADDR_SURF_8_BANK));
3188                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                NUM_BANKS(ADDR_SURF_8_BANK));
3192                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                NUM_BANKS(ADDR_SURF_8_BANK));
3196                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3197                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3198                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                NUM_BANKS(ADDR_SURF_16_BANK));
3200                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3201                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3202                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                NUM_BANKS(ADDR_SURF_16_BANK));
3204                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_16_BANK));
3208                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                 NUM_BANKS(ADDR_SURF_16_BANK));
3212                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                 NUM_BANKS(ADDR_SURF_16_BANK));
3216                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                 NUM_BANKS(ADDR_SURF_16_BANK));
3220                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224
3225                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3226                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3227                            reg_offset != 23)
3228                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3229
3230                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3231                        if (reg_offset != 7)
3232                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3233
3234                break;
3235        default:
3236                dev_warn(adev->dev,
3237                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3238                         adev->asic_type);
3239                /* fall through */
3240
3241        case CHIP_CARRIZO:
3242                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3243                                PIPE_CONFIG(ADDR_SURF_P2) |
3244                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3245                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3246                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247                                PIPE_CONFIG(ADDR_SURF_P2) |
3248                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3249                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3250                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                PIPE_CONFIG(ADDR_SURF_P2) |
3252                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3253                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                PIPE_CONFIG(ADDR_SURF_P2) |
3256                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3257                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                PIPE_CONFIG(ADDR_SURF_P2) |
3260                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3261                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                PIPE_CONFIG(ADDR_SURF_P2) |
3264                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3265                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267                                PIPE_CONFIG(ADDR_SURF_P2) |
3268                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3271                                PIPE_CONFIG(ADDR_SURF_P2));
3272                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                PIPE_CONFIG(ADDR_SURF_P2) |
3274                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3275                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3276                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                 PIPE_CONFIG(ADDR_SURF_P2) |
3278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3279                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3284                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3285                                 PIPE_CONFIG(ADDR_SURF_P2) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289                                 PIPE_CONFIG(ADDR_SURF_P2) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3292                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3293                                 PIPE_CONFIG(ADDR_SURF_P2) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3297                                 PIPE_CONFIG(ADDR_SURF_P2) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3300                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3301                                 PIPE_CONFIG(ADDR_SURF_P2) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3304                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3308                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3313                                 PIPE_CONFIG(ADDR_SURF_P2) |
3314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3317                                 PIPE_CONFIG(ADDR_SURF_P2) |
3318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3321                                 PIPE_CONFIG(ADDR_SURF_P2) |
3322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3325                                 PIPE_CONFIG(ADDR_SURF_P2) |
3326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3329                                 PIPE_CONFIG(ADDR_SURF_P2) |
3330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3333                                 PIPE_CONFIG(ADDR_SURF_P2) |
3334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3335                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3336                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3337                                 PIPE_CONFIG(ADDR_SURF_P2) |
3338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3339                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3340                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3341                                 PIPE_CONFIG(ADDR_SURF_P2) |
3342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3344
3345                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                NUM_BANKS(ADDR_SURF_8_BANK));
3349                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3350                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352                                NUM_BANKS(ADDR_SURF_8_BANK));
3353                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3355                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3356                                NUM_BANKS(ADDR_SURF_8_BANK));
3357                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360                                NUM_BANKS(ADDR_SURF_8_BANK));
3361                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                NUM_BANKS(ADDR_SURF_8_BANK));
3365                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                NUM_BANKS(ADDR_SURF_8_BANK));
3369                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                NUM_BANKS(ADDR_SURF_8_BANK));
3373                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3374                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3375                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                NUM_BANKS(ADDR_SURF_16_BANK));
3377                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3378                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380                                NUM_BANKS(ADDR_SURF_16_BANK));
3381                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_16_BANK));
3385                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_16_BANK));
3389                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                 NUM_BANKS(ADDR_SURF_16_BANK));
3393                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                 NUM_BANKS(ADDR_SURF_16_BANK));
3397                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401
3402                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3403                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3404                            reg_offset != 23)
3405                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3406
3407                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3408                        if (reg_offset != 7)
3409                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3410
3411                break;
3412        }
3413}
3414
3415static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3416                                  u32 se_num, u32 sh_num, u32 instance)
3417{
3418        u32 data;
3419
3420        if (instance == 0xffffffff)
3421                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3422        else
3423                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3424
3425        if (se_num == 0xffffffff)
3426                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3427        else
3428                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3429
3430        if (sh_num == 0xffffffff)
3431                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3432        else
3433                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3434
3435        WREG32(mmGRBM_GFX_INDEX, data);
3436}
3437
3438static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3439                                  u32 me, u32 pipe, u32 q)
3440{
3441        vi_srbm_select(adev, me, pipe, q, 0);
3442}
3443
3444static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3445{
3446        u32 data, mask;
3447
3448        data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3449                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3450
3451        data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3452
3453        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3454                                         adev->gfx.config.max_sh_per_se);
3455
3456        return (~data) & mask;
3457}
3458
3459static void
3460gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3461{
3462        switch (adev->asic_type) {
3463        case CHIP_FIJI:
3464        case CHIP_VEGAM:
3465                *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3466                          RB_XSEL2(1) | PKR_MAP(2) |
3467                          PKR_XSEL(1) | PKR_YSEL(1) |
3468                          SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3469                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3470                           SE_PAIR_YSEL(2);
3471                break;
3472        case CHIP_TONGA:
3473        case CHIP_POLARIS10:
3474                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3475                          SE_XSEL(1) | SE_YSEL(1);
3476                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3477                           SE_PAIR_YSEL(2);
3478                break;
3479        case CHIP_TOPAZ:
3480        case CHIP_CARRIZO:
3481                *rconf |= RB_MAP_PKR0(2);
3482                *rconf1 |= 0x0;
3483                break;
3484        case CHIP_POLARIS11:
3485        case CHIP_POLARIS12:
3486                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3487                          SE_XSEL(1) | SE_YSEL(1);
3488                *rconf1 |= 0x0;
3489                break;
3490        case CHIP_STONEY:
3491                *rconf |= 0x0;
3492                *rconf1 |= 0x0;
3493                break;
3494        default:
3495                DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3496                break;
3497        }
3498}
3499
3500static void
3501gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3502                                        u32 raster_config, u32 raster_config_1,
3503                                        unsigned rb_mask, unsigned num_rb)
3504{
3505        unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3506        unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3507        unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3508        unsigned rb_per_se = num_rb / num_se;
3509        unsigned se_mask[4];
3510        unsigned se;
3511
3512        se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3513        se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3514        se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3515        se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3516
3517        WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3518        WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3519        WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3520
3521        if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3522                             (!se_mask[2] && !se_mask[3]))) {
3523                raster_config_1 &= ~SE_PAIR_MAP_MASK;
3524
3525                if (!se_mask[0] && !se_mask[1]) {
3526                        raster_config_1 |=
3527                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3528                } else {
3529                        raster_config_1 |=
3530                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3531                }
3532        }
3533
3534        for (se = 0; se < num_se; se++) {
3535                unsigned raster_config_se = raster_config;
3536                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3537                unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3538                int idx = (se / 2) * 2;
3539
3540                if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3541                        raster_config_se &= ~SE_MAP_MASK;
3542
3543                        if (!se_mask[idx]) {
3544                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3545                        } else {
3546                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3547                        }
3548                }
3549
3550                pkr0_mask &= rb_mask;
3551                pkr1_mask &= rb_mask;
3552                if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3553                        raster_config_se &= ~PKR_MAP_MASK;
3554
3555                        if (!pkr0_mask) {
3556                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3557                        } else {
3558                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3559                        }
3560                }
3561
3562                if (rb_per_se >= 2) {
3563                        unsigned rb0_mask = 1 << (se * rb_per_se);
3564                        unsigned rb1_mask = rb0_mask << 1;
3565
3566                        rb0_mask &= rb_mask;
3567                        rb1_mask &= rb_mask;
3568                        if (!rb0_mask || !rb1_mask) {
3569                                raster_config_se &= ~RB_MAP_PKR0_MASK;
3570
3571                                if (!rb0_mask) {
3572                                        raster_config_se |=
3573                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3574                                } else {
3575                                        raster_config_se |=
3576                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3577                                }
3578                        }
3579
3580                        if (rb_per_se > 2) {
3581                                rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3582                                rb1_mask = rb0_mask << 1;
3583                                rb0_mask &= rb_mask;
3584                                rb1_mask &= rb_mask;
3585                                if (!rb0_mask || !rb1_mask) {
3586                                        raster_config_se &= ~RB_MAP_PKR1_MASK;
3587
3588                                        if (!rb0_mask) {
3589                                                raster_config_se |=
3590                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3591                                        } else {
3592                                                raster_config_se |=
3593                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3594                                        }
3595                                }
3596                        }
3597                }
3598
3599                /* GRBM_GFX_INDEX has a different offset on VI */
3600                gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3601                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3602                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3603        }
3604
3605        /* GRBM_GFX_INDEX has a different offset on VI */
3606        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3607}
3608
3609static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3610{
3611        int i, j;
3612        u32 data;
3613        u32 raster_config = 0, raster_config_1 = 0;
3614        u32 active_rbs = 0;
3615        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3616                                        adev->gfx.config.max_sh_per_se;
3617        unsigned num_rb_pipes;
3618
3619        mutex_lock(&adev->grbm_idx_mutex);
3620        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3621                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3622                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3623                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3624                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3625                                               rb_bitmap_width_per_sh);
3626                }
3627        }
3628        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3629
3630        adev->gfx.config.backend_enable_mask = active_rbs;
3631        adev->gfx.config.num_rbs = hweight32(active_rbs);
3632
3633        num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3634                             adev->gfx.config.max_shader_engines, 16);
3635
3636        gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3637
3638        if (!adev->gfx.config.backend_enable_mask ||
3639                        adev->gfx.config.num_rbs >= num_rb_pipes) {
3640                WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3641                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3642        } else {
3643                gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3644                                                        adev->gfx.config.backend_enable_mask,
3645                                                        num_rb_pipes);
3646        }
3647
3648        /* cache the values for userspace */
3649        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3650                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3651                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3652                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
3653                                RREG32(mmCC_RB_BACKEND_DISABLE);
3654                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3655                                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3656                        adev->gfx.config.rb_config[i][j].raster_config =
3657                                RREG32(mmPA_SC_RASTER_CONFIG);
3658                        adev->gfx.config.rb_config[i][j].raster_config_1 =
3659                                RREG32(mmPA_SC_RASTER_CONFIG_1);
3660                }
3661        }
3662        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663        mutex_unlock(&adev->grbm_idx_mutex);
3664}
3665
3666/**
3667 * gfx_v8_0_init_compute_vmid - gart enable
3668 *
3669 * @adev: amdgpu_device pointer
3670 *
3671 * Initialize compute vmid sh_mem registers
3672 *
3673 */
3674#define DEFAULT_SH_MEM_BASES    (0x6000)
3675#define FIRST_COMPUTE_VMID      (8)
3676#define LAST_COMPUTE_VMID       (16)
3677static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3678{
3679        int i;
3680        uint32_t sh_mem_config;
3681        uint32_t sh_mem_bases;
3682
3683        /*
3684         * Configure apertures:
3685         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3686         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3687         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3688         */
3689        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3690
3691        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3692                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3693                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3694                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3695                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3696                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3697
3698        mutex_lock(&adev->srbm_mutex);
3699        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3700                vi_srbm_select(adev, 0, 0, 0, i);
3701                /* CP and shaders */
3702                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3703                WREG32(mmSH_MEM_APE1_BASE, 1);
3704                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3705                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3706        }
3707        vi_srbm_select(adev, 0, 0, 0, 0);
3708        mutex_unlock(&adev->srbm_mutex);
3709}
3710
3711static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3712{
3713        switch (adev->asic_type) {
3714        default:
3715                adev->gfx.config.double_offchip_lds_buf = 1;
3716                break;
3717        case CHIP_CARRIZO:
3718        case CHIP_STONEY:
3719                adev->gfx.config.double_offchip_lds_buf = 0;
3720                break;
3721        }
3722}
3723
3724static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3725{
3726        u32 tmp, sh_static_mem_cfg;
3727        int i;
3728
3729        WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3730        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3731        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3732        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3733
3734        gfx_v8_0_tiling_mode_table_init(adev);
3735        gfx_v8_0_setup_rb(adev);
3736        gfx_v8_0_get_cu_info(adev);
3737        gfx_v8_0_config_init(adev);
3738
3739        /* XXX SH_MEM regs */
3740        /* where to put LDS, scratch, GPUVM in FSA64 space */
3741        sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3742                                   SWIZZLE_ENABLE, 1);
3743        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3744                                   ELEMENT_SIZE, 1);
3745        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3746                                   INDEX_STRIDE, 3);
3747        WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3748
3749        mutex_lock(&adev->srbm_mutex);
3750        for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3751                vi_srbm_select(adev, 0, 0, 0, i);
3752                /* CP and shaders */
3753                if (i == 0) {
3754                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3755                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3756                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3757                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3758                        WREG32(mmSH_MEM_CONFIG, tmp);
3759                        WREG32(mmSH_MEM_BASES, 0);
3760                } else {
3761                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3762                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3763                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3764                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3765                        WREG32(mmSH_MEM_CONFIG, tmp);
3766                        tmp = adev->gmc.shared_aperture_start >> 48;
3767                        WREG32(mmSH_MEM_BASES, tmp);
3768                }
3769
3770                WREG32(mmSH_MEM_APE1_BASE, 1);
3771                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3772        }
3773        vi_srbm_select(adev, 0, 0, 0, 0);
3774        mutex_unlock(&adev->srbm_mutex);
3775
3776        gfx_v8_0_init_compute_vmid(adev);
3777
3778        mutex_lock(&adev->grbm_idx_mutex);
3779        /*
3780         * making sure that the following register writes will be broadcasted
3781         * to all the shaders
3782         */
3783        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3784
3785        WREG32(mmPA_SC_FIFO_SIZE,
3786                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3787                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3788                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3789                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3790                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3791                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3792                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3793                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3794
3795        tmp = RREG32(mmSPI_ARB_PRIORITY);
3796        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3797        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3798        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3799        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3800        WREG32(mmSPI_ARB_PRIORITY, tmp);
3801
3802        mutex_unlock(&adev->grbm_idx_mutex);
3803
3804}
3805
3806static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3807{
3808        u32 i, j, k;
3809        u32 mask;
3810
3811        mutex_lock(&adev->grbm_idx_mutex);
3812        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3813                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3814                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3815                        for (k = 0; k < adev->usec_timeout; k++) {
3816                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3817                                        break;
3818                                udelay(1);
3819                        }
3820                        if (k == adev->usec_timeout) {
3821                                gfx_v8_0_select_se_sh(adev, 0xffffffff,
3822                                                      0xffffffff, 0xffffffff);
3823                                mutex_unlock(&adev->grbm_idx_mutex);
3824                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3825                                         i, j);
3826                                return;
3827                        }
3828                }
3829        }
3830        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3831        mutex_unlock(&adev->grbm_idx_mutex);
3832
3833        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3834                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3835                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3836                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3837        for (k = 0; k < adev->usec_timeout; k++) {
3838                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3839                        break;
3840                udelay(1);
3841        }
3842}
3843
3844static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3845                                               bool enable)
3846{
3847        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3848
3849        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3850        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3851        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3852        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3853
3854        WREG32(mmCP_INT_CNTL_RING0, tmp);
3855}
3856
3857static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3858{
3859        /* csib */
3860        WREG32(mmRLC_CSIB_ADDR_HI,
3861                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3862        WREG32(mmRLC_CSIB_ADDR_LO,
3863                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3864        WREG32(mmRLC_CSIB_LENGTH,
3865                        adev->gfx.rlc.clear_state_size);
3866}
3867
3868static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3869                                int ind_offset,
3870                                int list_size,
3871                                int *unique_indices,
3872                                int *indices_count,
3873                                int max_indices,
3874                                int *ind_start_offsets,
3875                                int *offset_count,
3876                                int max_offset)
3877{
3878        int indices;
3879        bool new_entry = true;
3880
3881        for (; ind_offset < list_size; ind_offset++) {
3882
3883                if (new_entry) {
3884                        new_entry = false;
3885                        ind_start_offsets[*offset_count] = ind_offset;
3886                        *offset_count = *offset_count + 1;
3887                        BUG_ON(*offset_count >= max_offset);
3888                }
3889
3890                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3891                        new_entry = true;
3892                        continue;
3893                }
3894
3895                ind_offset += 2;
3896
3897                /* look for the matching indice */
3898                for (indices = 0;
3899                        indices < *indices_count;
3900                        indices++) {
3901                        if (unique_indices[indices] ==
3902                                register_list_format[ind_offset])
3903                                break;
3904                }
3905
3906                if (indices >= *indices_count) {
3907                        unique_indices[*indices_count] =
3908                                register_list_format[ind_offset];
3909                        indices = *indices_count;
3910                        *indices_count = *indices_count + 1;
3911                        BUG_ON(*indices_count >= max_indices);
3912                }
3913
3914                register_list_format[ind_offset] = indices;
3915        }
3916}
3917
3918static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3919{
3920        int i, temp, data;
3921        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3922        int indices_count = 0;
3923        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3924        int offset_count = 0;
3925
3926        int list_size;
3927        unsigned int *register_list_format =
3928                kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3929        if (!register_list_format)
3930                return -ENOMEM;
3931        memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3932                        adev->gfx.rlc.reg_list_format_size_bytes);
3933
3934        gfx_v8_0_parse_ind_reg_list(register_list_format,
3935                                RLC_FormatDirectRegListLength,
3936                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3937                                unique_indices,
3938                                &indices_count,
3939                                ARRAY_SIZE(unique_indices),
3940                                indirect_start_offsets,
3941                                &offset_count,
3942                                ARRAY_SIZE(indirect_start_offsets));
3943
3944        /* save and restore list */
3945        WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3946
3947        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3948        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3949                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3950
3951        /* indirect list */
3952        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3953        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3954                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3955
3956        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3957        list_size = list_size >> 1;
3958        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3959        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3960
3961        /* starting offsets starts */
3962        WREG32(mmRLC_GPM_SCRATCH_ADDR,
3963                adev->gfx.rlc.starting_offsets_start);
3964        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3965                WREG32(mmRLC_GPM_SCRATCH_DATA,
3966                                indirect_start_offsets[i]);
3967
3968        /* unique indices */
3969        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3970        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3971        for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3972                if (unique_indices[i] != 0) {
3973                        WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3974                        WREG32(data + i, unique_indices[i] >> 20);
3975                }
3976        }
3977        kfree(register_list_format);
3978
3979        return 0;
3980}
3981
3982static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3983{
3984        WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3985}
3986
3987static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3988{
3989        uint32_t data;
3990
3991        WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3992
3993        data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3994        data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3995        data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3996        data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3997        WREG32(mmRLC_PG_DELAY, data);
3998
3999        WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4000        WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4001
4002}
4003
4004static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4005                                                bool enable)
4006{
4007        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4008}
4009
4010static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4011                                                  bool enable)
4012{
4013        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4014}
4015
4016static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4017{
4018        WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4019}
4020
4021static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4022{
4023        if ((adev->asic_type == CHIP_CARRIZO) ||
4024            (adev->asic_type == CHIP_STONEY)) {
4025                gfx_v8_0_init_csb(adev);
4026                gfx_v8_0_init_save_restore_list(adev);
4027                gfx_v8_0_enable_save_restore_machine(adev);
4028                WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4029                gfx_v8_0_init_power_gating(adev);
4030                WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4031        } else if ((adev->asic_type == CHIP_POLARIS11) ||
4032                   (adev->asic_type == CHIP_POLARIS12) ||
4033                   (adev->asic_type == CHIP_VEGAM)) {
4034                gfx_v8_0_init_csb(adev);
4035                gfx_v8_0_init_save_restore_list(adev);
4036                gfx_v8_0_enable_save_restore_machine(adev);
4037                gfx_v8_0_init_power_gating(adev);
4038        }
4039
4040}
4041
4042static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4043{
4044        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4045
4046        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4047        gfx_v8_0_wait_for_rlc_serdes(adev);
4048}
4049
4050static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4051{
4052        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4053        udelay(50);
4054
4055        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4056        udelay(50);
4057}
4058
4059static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4060{
4061        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4062
4063        /* carrizo do enable cp interrupt after cp inited */
4064        if (!(adev->flags & AMD_IS_APU))
4065                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4066
4067        udelay(50);
4068}
4069
4070static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4071{
4072        if (amdgpu_sriov_vf(adev)) {
4073                gfx_v8_0_init_csb(adev);
4074                return 0;
4075        }
4076
4077        adev->gfx.rlc.funcs->stop(adev);
4078        adev->gfx.rlc.funcs->reset(adev);
4079        gfx_v8_0_init_pg(adev);
4080        adev->gfx.rlc.funcs->start(adev);
4081
4082        return 0;
4083}
4084
4085static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4086{
4087        int i;
4088        u32 tmp = RREG32(mmCP_ME_CNTL);
4089
4090        if (enable) {
4091                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4092                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4093                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4094        } else {
4095                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4096                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4097                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4098                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4099                        adev->gfx.gfx_ring[i].sched.ready = false;
4100        }
4101        WREG32(mmCP_ME_CNTL, tmp);
4102        udelay(50);
4103}
4104
4105static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4106{
4107        u32 count = 0;
4108        const struct cs_section_def *sect = NULL;
4109        const struct cs_extent_def *ext = NULL;
4110
4111        /* begin clear state */
4112        count += 2;
4113        /* context control state */
4114        count += 3;
4115
4116        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4117                for (ext = sect->section; ext->extent != NULL; ++ext) {
4118                        if (sect->id == SECT_CONTEXT)
4119                                count += 2 + ext->reg_count;
4120                        else
4121                                return 0;
4122                }
4123        }
4124        /* pa_sc_raster_config/pa_sc_raster_config1 */
4125        count += 4;
4126        /* end clear state */
4127        count += 2;
4128        /* clear state */
4129        count += 2;
4130
4131        return count;
4132}
4133
4134static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4135{
4136        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4137        const struct cs_section_def *sect = NULL;
4138        const struct cs_extent_def *ext = NULL;
4139        int r, i;
4140
4141        /* init the CP */
4142        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4143        WREG32(mmCP_ENDIAN_SWAP, 0);
4144        WREG32(mmCP_DEVICE_ID, 1);
4145
4146        gfx_v8_0_cp_gfx_enable(adev, true);
4147
4148        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4149        if (r) {
4150                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4151                return r;
4152        }
4153
4154        /* clear state buffer */
4155        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4156        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4157
4158        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4159        amdgpu_ring_write(ring, 0x80000000);
4160        amdgpu_ring_write(ring, 0x80000000);
4161
4162        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4163                for (ext = sect->section; ext->extent != NULL; ++ext) {
4164                        if (sect->id == SECT_CONTEXT) {
4165                                amdgpu_ring_write(ring,
4166                                       PACKET3(PACKET3_SET_CONTEXT_REG,
4167                                               ext->reg_count));
4168                                amdgpu_ring_write(ring,
4169                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4170                                for (i = 0; i < ext->reg_count; i++)
4171                                        amdgpu_ring_write(ring, ext->extent[i]);
4172                        }
4173                }
4174        }
4175
4176        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4177        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4178        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4179        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4180
4181        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4182        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4183
4184        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4185        amdgpu_ring_write(ring, 0);
4186
4187        /* init the CE partitions */
4188        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4189        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4190        amdgpu_ring_write(ring, 0x8000);
4191        amdgpu_ring_write(ring, 0x8000);
4192
4193        amdgpu_ring_commit(ring);
4194
4195        return 0;
4196}
4197static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4198{
4199        u32 tmp;
4200        /* no gfx doorbells on iceland */
4201        if (adev->asic_type == CHIP_TOPAZ)
4202                return;
4203
4204        tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4205
4206        if (ring->use_doorbell) {
4207                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4208                                DOORBELL_OFFSET, ring->doorbell_index);
4209                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4210                                                DOORBELL_HIT, 0);
4211                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4212                                            DOORBELL_EN, 1);
4213        } else {
4214                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4215        }
4216
4217        WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4218
4219        if (adev->flags & AMD_IS_APU)
4220                return;
4221
4222        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4223                                        DOORBELL_RANGE_LOWER,
4224                                        adev->doorbell_index.gfx_ring0);
4225        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4226
4227        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4228                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4229}
4230
4231static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4232{
4233        struct amdgpu_ring *ring;
4234        u32 tmp;
4235        u32 rb_bufsz;
4236        u64 rb_addr, rptr_addr, wptr_gpu_addr;
4237
4238        /* Set the write pointer delay */
4239        WREG32(mmCP_RB_WPTR_DELAY, 0);
4240
4241        /* set the RB to use vmid 0 */
4242        WREG32(mmCP_RB_VMID, 0);
4243
4244        /* Set ring buffer size */
4245        ring = &adev->gfx.gfx_ring[0];
4246        rb_bufsz = order_base_2(ring->ring_size / 8);
4247        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4248        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4249        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4250        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4251#ifdef __BIG_ENDIAN
4252        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4253#endif
4254        WREG32(mmCP_RB0_CNTL, tmp);
4255
4256        /* Initialize the ring buffer's read and write pointers */
4257        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4258        ring->wptr = 0;
4259        WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4260
4261        /* set the wb address wether it's enabled or not */
4262        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4263        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4264        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4265
4266        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4267        WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4268        WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4269        mdelay(1);
4270        WREG32(mmCP_RB0_CNTL, tmp);
4271
4272        rb_addr = ring->gpu_addr >> 8;
4273        WREG32(mmCP_RB0_BASE, rb_addr);
4274        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4275
4276        gfx_v8_0_set_cpg_door_bell(adev, ring);
4277        /* start the ring */
4278        amdgpu_ring_clear_ring(ring);
4279        gfx_v8_0_cp_gfx_start(adev);
4280        ring->sched.ready = true;
4281
4282        return 0;
4283}
4284
4285static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4286{
4287        int i;
4288
4289        if (enable) {
4290                WREG32(mmCP_MEC_CNTL, 0);
4291        } else {
4292                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4293                for (i = 0; i < adev->gfx.num_compute_rings; i++)
4294                        adev->gfx.compute_ring[i].sched.ready = false;
4295                adev->gfx.kiq.ring.sched.ready = false;
4296        }
4297        udelay(50);
4298}
4299
4300/* KIQ functions */
4301static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4302{
4303        uint32_t tmp;
4304        struct amdgpu_device *adev = ring->adev;
4305
4306        /* tell RLC which is KIQ queue */
4307        tmp = RREG32(mmRLC_CP_SCHEDULERS);
4308        tmp &= 0xffffff00;
4309        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4310        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4311        tmp |= 0x80;
4312        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4313}
4314
4315static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4316{
4317        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4318        uint64_t queue_mask = 0;
4319        int r, i;
4320
4321        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4322                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4323                        continue;
4324
4325                /* This situation may be hit in the future if a new HW
4326                 * generation exposes more than 64 queues. If so, the
4327                 * definition of queue_mask needs updating */
4328                if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4329                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4330                        break;
4331                }
4332
4333                queue_mask |= (1ull << i);
4334        }
4335
4336        r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4337        if (r) {
4338                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4339                return r;
4340        }
4341        /* set resources */
4342        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4343        amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4344        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4345        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4346        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4347        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4348        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4349        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4350        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4351                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4352                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4353                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4354
4355                /* map queues */
4356                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4357                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4358                amdgpu_ring_write(kiq_ring,
4359                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4360                amdgpu_ring_write(kiq_ring,
4361                                  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4362                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4363                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4364                                  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4365                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4366                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4367                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4368                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4369        }
4370
4371        amdgpu_ring_commit(kiq_ring);
4372
4373        return 0;
4374}
4375
4376static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4377{
4378        int i, r = 0;
4379
4380        if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4381                WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4382                for (i = 0; i < adev->usec_timeout; i++) {
4383                        if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4384                                break;
4385                        udelay(1);
4386                }
4387                if (i == adev->usec_timeout)
4388                        r = -ETIMEDOUT;
4389        }
4390        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4391        WREG32(mmCP_HQD_PQ_RPTR, 0);
4392        WREG32(mmCP_HQD_PQ_WPTR, 0);
4393
4394        return r;
4395}
4396
4397static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4398{
4399        struct amdgpu_device *adev = ring->adev;
4400        struct vi_mqd *mqd = ring->mqd_ptr;
4401        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4402        uint32_t tmp;
4403
4404        mqd->header = 0xC0310800;
4405        mqd->compute_pipelinestat_enable = 0x00000001;
4406        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4407        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4408        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4409        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4410        mqd->compute_misc_reserved = 0x00000003;
4411        mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4412                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4413        mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4414                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4415        eop_base_addr = ring->eop_gpu_addr >> 8;
4416        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4417        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4418
4419        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4420        tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4421        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4422                        (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4423
4424        mqd->cp_hqd_eop_control = tmp;
4425
4426        /* enable doorbell? */
4427        tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4428                            CP_HQD_PQ_DOORBELL_CONTROL,
4429                            DOORBELL_EN,
4430                            ring->use_doorbell ? 1 : 0);
4431
4432        mqd->cp_hqd_pq_doorbell_control = tmp;
4433
4434        /* set the pointer to the MQD */
4435        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4436        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4437
4438        /* set MQD vmid to 0 */
4439        tmp = RREG32(mmCP_MQD_CONTROL);
4440        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4441        mqd->cp_mqd_control = tmp;
4442
4443        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4444        hqd_gpu_addr = ring->gpu_addr >> 8;
4445        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4446        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4447
4448        /* set up the HQD, this is similar to CP_RB0_CNTL */
4449        tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4450        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4451                            (order_base_2(ring->ring_size / 4) - 1));
4452        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4453                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4454#ifdef __BIG_ENDIAN
4455        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4456#endif
4457        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4458        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4459        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4460        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4461        mqd->cp_hqd_pq_control = tmp;
4462
4463        /* set the wb address whether it's enabled or not */
4464        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4466        mqd->cp_hqd_pq_rptr_report_addr_hi =
4467                upper_32_bits(wb_gpu_addr) & 0xffff;
4468
4469        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4470        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4471        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4472        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4473
4474        tmp = 0;
4475        /* enable the doorbell if requested */
4476        if (ring->use_doorbell) {
4477                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4478                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4479                                DOORBELL_OFFSET, ring->doorbell_index);
4480
4481                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4482                                         DOORBELL_EN, 1);
4483                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4484                                         DOORBELL_SOURCE, 0);
4485                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4486                                         DOORBELL_HIT, 0);
4487        }
4488
4489        mqd->cp_hqd_pq_doorbell_control = tmp;
4490
4491        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4492        ring->wptr = 0;
4493        mqd->cp_hqd_pq_wptr = ring->wptr;
4494        mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4495
4496        /* set the vmid for the queue */
4497        mqd->cp_hqd_vmid = 0;
4498
4499        tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4500        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4501        mqd->cp_hqd_persistent_state = tmp;
4502
4503        /* set MTYPE */
4504        tmp = RREG32(mmCP_HQD_IB_CONTROL);
4505        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4506        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4507        mqd->cp_hqd_ib_control = tmp;
4508
4509        tmp = RREG32(mmCP_HQD_IQ_TIMER);
4510        tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4511        mqd->cp_hqd_iq_timer = tmp;
4512
4513        tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4514        tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4515        mqd->cp_hqd_ctx_save_control = tmp;
4516
4517        /* defaults */
4518        mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4519        mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4520        mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4521        mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4522        mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4523        mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4524        mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4525        mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4526        mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4527        mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4528        mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4529        mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4530        mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4531        mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4532        mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4533
4534        /* activate the queue */
4535        mqd->cp_hqd_active = 1;
4536
4537        return 0;
4538}
4539
4540int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4541                        struct vi_mqd *mqd)
4542{
4543        uint32_t mqd_reg;
4544        uint32_t *mqd_data;
4545
4546        /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4547        mqd_data = &mqd->cp_mqd_base_addr_lo;
4548
4549        /* disable wptr polling */
4550        WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4551
4552        /* program all HQD registers */
4553        for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4554                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4555
4556        /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4557         * This is safe since EOP RPTR==WPTR for any inactive HQD
4558         * on ASICs that do not support context-save.
4559         * EOP writes/reads can start anywhere in the ring.
4560         */
4561        if (adev->asic_type != CHIP_TONGA) {
4562                WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4563                WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4564                WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4565        }
4566
4567        for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4568                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4569
4570        /* activate the HQD */
4571        for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4572                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4573
4574        return 0;
4575}
4576
4577static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4578{
4579        struct amdgpu_device *adev = ring->adev;
4580        struct vi_mqd *mqd = ring->mqd_ptr;
4581        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4582
4583        gfx_v8_0_kiq_setting(ring);
4584
4585        if (adev->in_gpu_reset) { /* for GPU_RESET case */
4586                /* reset MQD to a clean status */
4587                if (adev->gfx.mec.mqd_backup[mqd_idx])
4588                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4589
4590                /* reset ring buffer */
4591                ring->wptr = 0;
4592                amdgpu_ring_clear_ring(ring);
4593                mutex_lock(&adev->srbm_mutex);
4594                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4595                gfx_v8_0_mqd_commit(adev, mqd);
4596                vi_srbm_select(adev, 0, 0, 0, 0);
4597                mutex_unlock(&adev->srbm_mutex);
4598        } else {
4599                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4600                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4601                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4602                mutex_lock(&adev->srbm_mutex);
4603                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4604                gfx_v8_0_mqd_init(ring);
4605                gfx_v8_0_mqd_commit(adev, mqd);
4606                vi_srbm_select(adev, 0, 0, 0, 0);
4607                mutex_unlock(&adev->srbm_mutex);
4608
4609                if (adev->gfx.mec.mqd_backup[mqd_idx])
4610                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4611        }
4612
4613        return 0;
4614}
4615
4616static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4617{
4618        struct amdgpu_device *adev = ring->adev;
4619        struct vi_mqd *mqd = ring->mqd_ptr;
4620        int mqd_idx = ring - &adev->gfx.compute_ring[0];
4621
4622        if (!adev->in_gpu_reset && !adev->in_suspend) {
4623                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4624                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4625                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4626                mutex_lock(&adev->srbm_mutex);
4627                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4628                gfx_v8_0_mqd_init(ring);
4629                vi_srbm_select(adev, 0, 0, 0, 0);
4630                mutex_unlock(&adev->srbm_mutex);
4631
4632                if (adev->gfx.mec.mqd_backup[mqd_idx])
4633                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4634        } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4635                /* reset MQD to a clean status */
4636                if (adev->gfx.mec.mqd_backup[mqd_idx])
4637                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4638                /* reset ring buffer */
4639                ring->wptr = 0;
4640                amdgpu_ring_clear_ring(ring);
4641        } else {
4642                amdgpu_ring_clear_ring(ring);
4643        }
4644        return 0;
4645}
4646
4647static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4648{
4649        if (adev->asic_type > CHIP_TONGA) {
4650                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4651                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4652        }
4653        /* enable doorbells */
4654        WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4655}
4656
4657static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4658{
4659        struct amdgpu_ring *ring;
4660        int r;
4661
4662        ring = &adev->gfx.kiq.ring;
4663
4664        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4665        if (unlikely(r != 0))
4666                return r;
4667
4668        r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4669        if (unlikely(r != 0))
4670                return r;
4671
4672        gfx_v8_0_kiq_init_queue(ring);
4673        amdgpu_bo_kunmap(ring->mqd_obj);
4674        ring->mqd_ptr = NULL;
4675        amdgpu_bo_unreserve(ring->mqd_obj);
4676        ring->sched.ready = true;
4677        return 0;
4678}
4679
4680static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4681{
4682        struct amdgpu_ring *ring = NULL;
4683        int r = 0, i;
4684
4685        gfx_v8_0_cp_compute_enable(adev, true);
4686
4687        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4688                ring = &adev->gfx.compute_ring[i];
4689
4690                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4691                if (unlikely(r != 0))
4692                        goto done;
4693                r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4694                if (!r) {
4695                        r = gfx_v8_0_kcq_init_queue(ring);
4696                        amdgpu_bo_kunmap(ring->mqd_obj);
4697                        ring->mqd_ptr = NULL;
4698                }
4699                amdgpu_bo_unreserve(ring->mqd_obj);
4700                if (r)
4701                        goto done;
4702        }
4703
4704        gfx_v8_0_set_mec_doorbell_range(adev);
4705
4706        r = gfx_v8_0_kiq_kcq_enable(adev);
4707        if (r)
4708                goto done;
4709
4710done:
4711        return r;
4712}
4713
4714static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4715{
4716        int r, i;
4717        struct amdgpu_ring *ring;
4718
4719        /* collect all the ring_tests here, gfx, kiq, compute */
4720        ring = &adev->gfx.gfx_ring[0];
4721        r = amdgpu_ring_test_helper(ring);
4722        if (r)
4723                return r;
4724
4725        ring = &adev->gfx.kiq.ring;
4726        r = amdgpu_ring_test_helper(ring);
4727        if (r)
4728                return r;
4729
4730        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4731                ring = &adev->gfx.compute_ring[i];
4732                amdgpu_ring_test_helper(ring);
4733        }
4734
4735        return 0;
4736}
4737
4738static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4739{
4740        int r;
4741
4742        if (!(adev->flags & AMD_IS_APU))
4743                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4744
4745        r = gfx_v8_0_kiq_resume(adev);
4746        if (r)
4747                return r;
4748
4749        r = gfx_v8_0_cp_gfx_resume(adev);
4750        if (r)
4751                return r;
4752
4753        r = gfx_v8_0_kcq_resume(adev);
4754        if (r)
4755                return r;
4756
4757        r = gfx_v8_0_cp_test_all_rings(adev);
4758        if (r)
4759                return r;
4760
4761        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4762
4763        return 0;
4764}
4765
4766static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4767{
4768        gfx_v8_0_cp_gfx_enable(adev, enable);
4769        gfx_v8_0_cp_compute_enable(adev, enable);
4770}
4771
4772static int gfx_v8_0_hw_init(void *handle)
4773{
4774        int r;
4775        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4776
4777        gfx_v8_0_init_golden_registers(adev);
4778        gfx_v8_0_constants_init(adev);
4779
4780        r = adev->gfx.rlc.funcs->resume(adev);
4781        if (r)
4782                return r;
4783
4784        r = gfx_v8_0_cp_resume(adev);
4785
4786        return r;
4787}
4788
4789static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4790{
4791        int r, i;
4792        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4793
4794        r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4795        if (r)
4796                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4797
4798        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4799                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4800
4801                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4802                amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4803                                                PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4804                                                PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4805                                                PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4806                                                PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4807                amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4808                amdgpu_ring_write(kiq_ring, 0);
4809                amdgpu_ring_write(kiq_ring, 0);
4810                amdgpu_ring_write(kiq_ring, 0);
4811        }
4812        r = amdgpu_ring_test_helper(kiq_ring);
4813        if (r)
4814                DRM_ERROR("KCQ disable failed\n");
4815
4816        return r;
4817}
4818
4819static bool gfx_v8_0_is_idle(void *handle)
4820{
4821        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822
4823        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4824                || RREG32(mmGRBM_STATUS2) != 0x8)
4825                return false;
4826        else
4827                return true;
4828}
4829
4830static bool gfx_v8_0_rlc_is_idle(void *handle)
4831{
4832        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4833
4834        if (RREG32(mmGRBM_STATUS2) != 0x8)
4835                return false;
4836        else
4837                return true;
4838}
4839
4840static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4841{
4842        unsigned int i;
4843        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845        for (i = 0; i < adev->usec_timeout; i++) {
4846                if (gfx_v8_0_rlc_is_idle(handle))
4847                        return 0;
4848
4849                udelay(1);
4850        }
4851        return -ETIMEDOUT;
4852}
4853
4854static int gfx_v8_0_wait_for_idle(void *handle)
4855{
4856        unsigned int i;
4857        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4858
4859        for (i = 0; i < adev->usec_timeout; i++) {
4860                if (gfx_v8_0_is_idle(handle))
4861                        return 0;
4862
4863                udelay(1);
4864        }
4865        return -ETIMEDOUT;
4866}
4867
4868static int gfx_v8_0_hw_fini(void *handle)
4869{
4870        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4871
4872        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4873        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4874
4875        amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4876
4877        amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4878
4879        /* disable KCQ to avoid CPC touch memory not valid anymore */
4880        gfx_v8_0_kcq_disable(adev);
4881
4882        if (amdgpu_sriov_vf(adev)) {
4883                pr_debug("For SRIOV client, shouldn't do anything.\n");
4884                return 0;
4885        }
4886        amdgpu_gfx_rlc_enter_safe_mode(adev);
4887        if (!gfx_v8_0_wait_for_idle(adev))
4888                gfx_v8_0_cp_enable(adev, false);
4889        else
4890                pr_err("cp is busy, skip halt cp\n");
4891        if (!gfx_v8_0_wait_for_rlc_idle(adev))
4892                adev->gfx.rlc.funcs->stop(adev);
4893        else
4894                pr_err("rlc is busy, skip halt rlc\n");
4895        amdgpu_gfx_rlc_exit_safe_mode(adev);
4896        return 0;
4897}
4898
4899static int gfx_v8_0_suspend(void *handle)
4900{
4901        return gfx_v8_0_hw_fini(handle);
4902}
4903
4904static int gfx_v8_0_resume(void *handle)
4905{
4906        return gfx_v8_0_hw_init(handle);
4907}
4908
4909static bool gfx_v8_0_check_soft_reset(void *handle)
4910{
4911        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4912        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4913        u32 tmp;
4914
4915        /* GRBM_STATUS */
4916        tmp = RREG32(mmGRBM_STATUS);
4917        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4918                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4919                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4920                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4921                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4922                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4923                   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4924                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4925                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4926                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4927                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4928                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930        }
4931
4932        /* GRBM_STATUS2 */
4933        tmp = RREG32(mmGRBM_STATUS2);
4934        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4935                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4936                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4937
4938        if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4939            REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4940            REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4941                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4942                                                SOFT_RESET_CPF, 1);
4943                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4944                                                SOFT_RESET_CPC, 1);
4945                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4946                                                SOFT_RESET_CPG, 1);
4947                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4948                                                SOFT_RESET_GRBM, 1);
4949        }
4950
4951        /* SRBM_STATUS */
4952        tmp = RREG32(mmSRBM_STATUS);
4953        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4954                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4955                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4956        if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4957                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4958                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4959
4960        if (grbm_soft_reset || srbm_soft_reset) {
4961                adev->gfx.grbm_soft_reset = grbm_soft_reset;
4962                adev->gfx.srbm_soft_reset = srbm_soft_reset;
4963                return true;
4964        } else {
4965                adev->gfx.grbm_soft_reset = 0;
4966                adev->gfx.srbm_soft_reset = 0;
4967                return false;
4968        }
4969}
4970
4971static int gfx_v8_0_pre_soft_reset(void *handle)
4972{
4973        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4974        u32 grbm_soft_reset = 0;
4975
4976        if ((!adev->gfx.grbm_soft_reset) &&
4977            (!adev->gfx.srbm_soft_reset))
4978                return 0;
4979
4980        grbm_soft_reset = adev->gfx.grbm_soft_reset;
4981
4982        /* stop the rlc */
4983        adev->gfx.rlc.funcs->stop(adev);
4984
4985        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4986            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4987                /* Disable GFX parsing/prefetching */
4988                gfx_v8_0_cp_gfx_enable(adev, false);
4989
4990        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4991            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4992            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4993            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4994                int i;
4995
4996                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4997                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4998
4999                        mutex_lock(&adev->srbm_mutex);
5000                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5001                        gfx_v8_0_deactivate_hqd(adev, 2);
5002                        vi_srbm_select(adev, 0, 0, 0, 0);
5003                        mutex_unlock(&adev->srbm_mutex);
5004                }
5005                /* Disable MEC parsing/prefetching */
5006                gfx_v8_0_cp_compute_enable(adev, false);
5007        }
5008
5009       return 0;
5010}
5011
5012static int gfx_v8_0_soft_reset(void *handle)
5013{
5014        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5015        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5016        u32 tmp;
5017
5018        if ((!adev->gfx.grbm_soft_reset) &&
5019            (!adev->gfx.srbm_soft_reset))
5020                return 0;
5021
5022        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5023        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5024
5025        if (grbm_soft_reset || srbm_soft_reset) {
5026                tmp = RREG32(mmGMCON_DEBUG);
5027                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5028                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5029                WREG32(mmGMCON_DEBUG, tmp);
5030                udelay(50);
5031        }
5032
5033        if (grbm_soft_reset) {
5034                tmp = RREG32(mmGRBM_SOFT_RESET);
5035                tmp |= grbm_soft_reset;
5036                dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5037                WREG32(mmGRBM_SOFT_RESET, tmp);
5038                tmp = RREG32(mmGRBM_SOFT_RESET);
5039
5040                udelay(50);
5041
5042                tmp &= ~grbm_soft_reset;
5043                WREG32(mmGRBM_SOFT_RESET, tmp);
5044                tmp = RREG32(mmGRBM_SOFT_RESET);
5045        }
5046
5047        if (srbm_soft_reset) {
5048                tmp = RREG32(mmSRBM_SOFT_RESET);
5049                tmp |= srbm_soft_reset;
5050                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5051                WREG32(mmSRBM_SOFT_RESET, tmp);
5052                tmp = RREG32(mmSRBM_SOFT_RESET);
5053
5054                udelay(50);
5055
5056                tmp &= ~srbm_soft_reset;
5057                WREG32(mmSRBM_SOFT_RESET, tmp);
5058                tmp = RREG32(mmSRBM_SOFT_RESET);
5059        }
5060
5061        if (grbm_soft_reset || srbm_soft_reset) {
5062                tmp = RREG32(mmGMCON_DEBUG);
5063                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5064                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5065                WREG32(mmGMCON_DEBUG, tmp);
5066        }
5067
5068        /* Wait a little for things to settle down */
5069        udelay(50);
5070
5071        return 0;
5072}
5073
5074static int gfx_v8_0_post_soft_reset(void *handle)
5075{
5076        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5077        u32 grbm_soft_reset = 0;
5078
5079        if ((!adev->gfx.grbm_soft_reset) &&
5080            (!adev->gfx.srbm_soft_reset))
5081                return 0;
5082
5083        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5084
5085        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5086            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5087            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5088            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5089                int i;
5090
5091                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5092                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5093
5094                        mutex_lock(&adev->srbm_mutex);
5095                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5096                        gfx_v8_0_deactivate_hqd(adev, 2);
5097                        vi_srbm_select(adev, 0, 0, 0, 0);
5098                        mutex_unlock(&adev->srbm_mutex);
5099                }
5100                gfx_v8_0_kiq_resume(adev);
5101                gfx_v8_0_kcq_resume(adev);
5102        }
5103
5104        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5105            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5106                gfx_v8_0_cp_gfx_resume(adev);
5107
5108        gfx_v8_0_cp_test_all_rings(adev);
5109
5110        adev->gfx.rlc.funcs->start(adev);
5111
5112        return 0;
5113}
5114
5115/**
5116 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5117 *
5118 * @adev: amdgpu_device pointer
5119 *
5120 * Fetches a GPU clock counter snapshot.
5121 * Returns the 64 bit clock counter snapshot.
5122 */
5123static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5124{
5125        uint64_t clock;
5126
5127        mutex_lock(&adev->gfx.gpu_clock_mutex);
5128        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5129        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5130                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5131        mutex_unlock(&adev->gfx.gpu_clock_mutex);
5132        return clock;
5133}
5134
5135static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5136                                          uint32_t vmid,
5137                                          uint32_t gds_base, uint32_t gds_size,
5138                                          uint32_t gws_base, uint32_t gws_size,
5139                                          uint32_t oa_base, uint32_t oa_size)
5140{
5141        /* GDS Base */
5142        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5143        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5144                                WRITE_DATA_DST_SEL(0)));
5145        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5146        amdgpu_ring_write(ring, 0);
5147        amdgpu_ring_write(ring, gds_base);
5148
5149        /* GDS Size */
5150        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5151        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5152                                WRITE_DATA_DST_SEL(0)));
5153        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5154        amdgpu_ring_write(ring, 0);
5155        amdgpu_ring_write(ring, gds_size);
5156
5157        /* GWS */
5158        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5159        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5160                                WRITE_DATA_DST_SEL(0)));
5161        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5162        amdgpu_ring_write(ring, 0);
5163        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5164
5165        /* OA */
5166        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5167        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5168                                WRITE_DATA_DST_SEL(0)));
5169        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5170        amdgpu_ring_write(ring, 0);
5171        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5172}
5173
5174static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5175{
5176        WREG32(mmSQ_IND_INDEX,
5177                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5178                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5179                (address << SQ_IND_INDEX__INDEX__SHIFT) |
5180                (SQ_IND_INDEX__FORCE_READ_MASK));
5181        return RREG32(mmSQ_IND_DATA);
5182}
5183
5184static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5185                           uint32_t wave, uint32_t thread,
5186                           uint32_t regno, uint32_t num, uint32_t *out)
5187{
5188        WREG32(mmSQ_IND_INDEX,
5189                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5190                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5191                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5192                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5193                (SQ_IND_INDEX__FORCE_READ_MASK) |
5194                (SQ_IND_INDEX__AUTO_INCR_MASK));
5195        while (num--)
5196                *(out++) = RREG32(mmSQ_IND_DATA);
5197}
5198
5199static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5200{
5201        /* type 0 wave data */
5202        dst[(*no_fields)++] = 0;
5203        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5204        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5205        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5206        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5207        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5208        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5209        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5210        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5211        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5212        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5213        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5214        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5215        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5216        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5217        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5218        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5219        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5220        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5221}
5222
5223static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5224                                     uint32_t wave, uint32_t start,
5225                                     uint32_t size, uint32_t *dst)
5226{
5227        wave_read_regs(
5228                adev, simd, wave, 0,
5229                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5230}
5231
5232
5233static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5234        .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5235        .select_se_sh = &gfx_v8_0_select_se_sh,
5236        .read_wave_data = &gfx_v8_0_read_wave_data,
5237        .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5238        .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5239};
5240
5241static int gfx_v8_0_early_init(void *handle)
5242{
5243        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5244
5245        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5246        adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5247        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5248        gfx_v8_0_set_ring_funcs(adev);
5249        gfx_v8_0_set_irq_funcs(adev);
5250        gfx_v8_0_set_gds_init(adev);
5251        gfx_v8_0_set_rlc_funcs(adev);
5252
5253        return 0;
5254}
5255
5256static int gfx_v8_0_late_init(void *handle)
5257{
5258        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5259        int r;
5260
5261        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5262        if (r)
5263                return r;
5264
5265        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5266        if (r)
5267                return r;
5268
5269        /* requires IBs so do in late init after IB pool is initialized */
5270        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5271        if (r)
5272                return r;
5273
5274        r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5275        if (r) {
5276                DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5277                return r;
5278        }
5279
5280        r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5281        if (r) {
5282                DRM_ERROR(
5283                        "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5284                        r);
5285                return r;
5286        }
5287
5288        return 0;
5289}
5290
5291static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5292                                                       bool enable)
5293{
5294        if (((adev->asic_type == CHIP_POLARIS11) ||
5295            (adev->asic_type == CHIP_POLARIS12) ||
5296            (adev->asic_type == CHIP_VEGAM)) &&
5297            adev->powerplay.pp_funcs->set_powergating_by_smu)
5298                /* Send msg to SMU via Powerplay */
5299                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5300
5301        WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5302}
5303
5304static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5305                                                        bool enable)
5306{
5307        WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5308}
5309
5310static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5311                bool enable)
5312{
5313        WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5314}
5315
5316static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5317                                          bool enable)
5318{
5319        WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5320}
5321
5322static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5323                                                bool enable)
5324{
5325        WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5326
5327        /* Read any GFX register to wake up GFX. */
5328        if (!enable)
5329                RREG32(mmDB_RENDER_CONTROL);
5330}
5331
5332static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5333                                          bool enable)
5334{
5335        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5336                cz_enable_gfx_cg_power_gating(adev, true);
5337                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5338                        cz_enable_gfx_pipeline_power_gating(adev, true);
5339        } else {
5340                cz_enable_gfx_cg_power_gating(adev, false);
5341                cz_enable_gfx_pipeline_power_gating(adev, false);
5342        }
5343}
5344
5345static int gfx_v8_0_set_powergating_state(void *handle,
5346                                          enum amd_powergating_state state)
5347{
5348        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349        bool enable = (state == AMD_PG_STATE_GATE);
5350
5351        if (amdgpu_sriov_vf(adev))
5352                return 0;
5353
5354        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5355                                AMD_PG_SUPPORT_RLC_SMU_HS |
5356                                AMD_PG_SUPPORT_CP |
5357                                AMD_PG_SUPPORT_GFX_DMG))
5358                amdgpu_gfx_rlc_enter_safe_mode(adev);
5359        switch (adev->asic_type) {
5360        case CHIP_CARRIZO:
5361        case CHIP_STONEY:
5362
5363                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5364                        cz_enable_sck_slow_down_on_power_up(adev, true);
5365                        cz_enable_sck_slow_down_on_power_down(adev, true);
5366                } else {
5367                        cz_enable_sck_slow_down_on_power_up(adev, false);
5368                        cz_enable_sck_slow_down_on_power_down(adev, false);
5369                }
5370                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5371                        cz_enable_cp_power_gating(adev, true);
5372                else
5373                        cz_enable_cp_power_gating(adev, false);
5374
5375                cz_update_gfx_cg_power_gating(adev, enable);
5376
5377                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5378                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5379                else
5380                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5381
5382                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5383                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5384                else
5385                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5386                break;
5387        case CHIP_POLARIS11:
5388        case CHIP_POLARIS12:
5389        case CHIP_VEGAM:
5390                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5391                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5392                else
5393                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5394
5395                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5396                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5397                else
5398                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5399
5400                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5401                        polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5402                else
5403                        polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5404                break;
5405        default:
5406                break;
5407        }
5408        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5409                                AMD_PG_SUPPORT_RLC_SMU_HS |
5410                                AMD_PG_SUPPORT_CP |
5411                                AMD_PG_SUPPORT_GFX_DMG))
5412                amdgpu_gfx_rlc_exit_safe_mode(adev);
5413        return 0;
5414}
5415
5416static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5417{
5418        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5419        int data;
5420
5421        if (amdgpu_sriov_vf(adev))
5422                *flags = 0;
5423
5424        /* AMD_CG_SUPPORT_GFX_MGCG */
5425        data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5426        if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5427                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5428
5429        /* AMD_CG_SUPPORT_GFX_CGLG */
5430        data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5431        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5432                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5433
5434        /* AMD_CG_SUPPORT_GFX_CGLS */
5435        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5436                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5437
5438        /* AMD_CG_SUPPORT_GFX_CGTS */
5439        data = RREG32(mmCGTS_SM_CTRL_REG);
5440        if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5441                *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5442
5443        /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5444        if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5445                *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5446
5447        /* AMD_CG_SUPPORT_GFX_RLC_LS */
5448        data = RREG32(mmRLC_MEM_SLP_CNTL);
5449        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5450                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5451
5452        /* AMD_CG_SUPPORT_GFX_CP_LS */
5453        data = RREG32(mmCP_MEM_SLP_CNTL);
5454        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5455                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5456}
5457
5458static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5459                                     uint32_t reg_addr, uint32_t cmd)
5460{
5461        uint32_t data;
5462
5463        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5464
5465        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5466        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5467
5468        data = RREG32(mmRLC_SERDES_WR_CTRL);
5469        if (adev->asic_type == CHIP_STONEY)
5470                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5471                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5472                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5473                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5474                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5475                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5476                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5477                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5478                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5479        else
5480                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5481                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5482                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5483                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5484                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5485                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5486                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5487                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5488                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5489                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5490                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5491        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5492                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5493                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5494                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5495
5496        WREG32(mmRLC_SERDES_WR_CTRL, data);
5497}
5498
5499#define MSG_ENTER_RLC_SAFE_MODE     1
5500#define MSG_EXIT_RLC_SAFE_MODE      0
5501#define RLC_GPR_REG2__REQ_MASK 0x00000001
5502#define RLC_GPR_REG2__REQ__SHIFT 0
5503#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5504#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5505
5506static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5507{
5508        uint32_t rlc_setting;
5509
5510        rlc_setting = RREG32(mmRLC_CNTL);
5511        if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5512                return false;
5513
5514        return true;
5515}
5516
5517static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5518{
5519        uint32_t data;
5520        unsigned i;
5521        data = RREG32(mmRLC_CNTL);
5522        data |= RLC_SAFE_MODE__CMD_MASK;
5523        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5524        data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5525        WREG32(mmRLC_SAFE_MODE, data);
5526
5527        /* wait for RLC_SAFE_MODE */
5528        for (i = 0; i < adev->usec_timeout; i++) {
5529                if ((RREG32(mmRLC_GPM_STAT) &
5530                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5531                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5532                    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5533                     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5534                        break;
5535                udelay(1);
5536        }
5537        for (i = 0; i < adev->usec_timeout; i++) {
5538                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5539                        break;
5540                udelay(1);
5541        }
5542}
5543
5544static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5545{
5546        uint32_t data;
5547        unsigned i;
5548
5549        data = RREG32(mmRLC_CNTL);
5550        data |= RLC_SAFE_MODE__CMD_MASK;
5551        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5552        WREG32(mmRLC_SAFE_MODE, data);
5553
5554        for (i = 0; i < adev->usec_timeout; i++) {
5555                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5556                        break;
5557                udelay(1);
5558        }
5559}
5560
5561static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5562        .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5563        .set_safe_mode = gfx_v8_0_set_safe_mode,
5564        .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5565        .init = gfx_v8_0_rlc_init,
5566        .get_csb_size = gfx_v8_0_get_csb_size,
5567        .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5568        .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5569        .resume = gfx_v8_0_rlc_resume,
5570        .stop = gfx_v8_0_rlc_stop,
5571        .reset = gfx_v8_0_rlc_reset,
5572        .start = gfx_v8_0_rlc_start
5573};
5574
5575static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5576                                                      bool enable)
5577{
5578        uint32_t temp, data;
5579
5580        amdgpu_gfx_rlc_enter_safe_mode(adev);
5581
5582        /* It is disabled by HW by default */
5583        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5584                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5585                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5586                                /* 1 - RLC memory Light sleep */
5587                                WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5588
5589                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5590                                WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5591                }
5592
5593                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5594                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5595                if (adev->flags & AMD_IS_APU)
5596                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5597                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5598                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5599                else
5600                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5601                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5602                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5603                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5604
5605                if (temp != data)
5606                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5607
5608                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5609                gfx_v8_0_wait_for_rlc_serdes(adev);
5610
5611                /* 5 - clear mgcg override */
5612                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5613
5614                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5615                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5616                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5617                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5618                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5619                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5620                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5621                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5622                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5623                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5624                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5625                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5626                        if (temp != data)
5627                                WREG32(mmCGTS_SM_CTRL_REG, data);
5628                }
5629                udelay(50);
5630
5631                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5632                gfx_v8_0_wait_for_rlc_serdes(adev);
5633        } else {
5634                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5635                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5636                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5637                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5638                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5639                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5640                if (temp != data)
5641                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5642
5643                /* 2 - disable MGLS in RLC */
5644                data = RREG32(mmRLC_MEM_SLP_CNTL);
5645                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5646                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5647                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5648                }
5649
5650                /* 3 - disable MGLS in CP */
5651                data = RREG32(mmCP_MEM_SLP_CNTL);
5652                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5653                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5654                        WREG32(mmCP_MEM_SLP_CNTL, data);
5655                }
5656
5657                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5658                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5659                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5660                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5661                if (temp != data)
5662                        WREG32(mmCGTS_SM_CTRL_REG, data);
5663
5664                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5665                gfx_v8_0_wait_for_rlc_serdes(adev);
5666
5667                /* 6 - set mgcg override */
5668                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5669
5670                udelay(50);
5671
5672                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5673                gfx_v8_0_wait_for_rlc_serdes(adev);
5674        }
5675
5676        amdgpu_gfx_rlc_exit_safe_mode(adev);
5677}
5678
5679static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5680                                                      bool enable)
5681{
5682        uint32_t temp, temp1, data, data1;
5683
5684        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685
5686        amdgpu_gfx_rlc_enter_safe_mode(adev);
5687
5688        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5689                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5690                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5691                if (temp1 != data1)
5692                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5693
5694                /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5695                gfx_v8_0_wait_for_rlc_serdes(adev);
5696
5697                /* 2 - clear cgcg override */
5698                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5699
5700                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701                gfx_v8_0_wait_for_rlc_serdes(adev);
5702
5703                /* 3 - write cmd to set CGLS */
5704                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5705
5706                /* 4 - enable cgcg */
5707                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5708
5709                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5710                        /* enable cgls*/
5711                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5712
5713                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5714                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5715
5716                        if (temp1 != data1)
5717                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5718                } else {
5719                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5720                }
5721
5722                if (temp != data)
5723                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5724
5725                /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5726                 * Cmp_busy/GFX_Idle interrupts
5727                 */
5728                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5729        } else {
5730                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5731                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5732
5733                /* TEST CGCG */
5734                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5735                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5736                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5737                if (temp1 != data1)
5738                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5739
5740                /* read gfx register to wake up cgcg */
5741                RREG32(mmCB_CGTT_SCLK_CTRL);
5742                RREG32(mmCB_CGTT_SCLK_CTRL);
5743                RREG32(mmCB_CGTT_SCLK_CTRL);
5744                RREG32(mmCB_CGTT_SCLK_CTRL);
5745
5746                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5747                gfx_v8_0_wait_for_rlc_serdes(adev);
5748
5749                /* write cmd to Set CGCG Overrride */
5750                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5751
5752                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5753                gfx_v8_0_wait_for_rlc_serdes(adev);
5754
5755                /* write cmd to Clear CGLS */
5756                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5757
5758                /* disable cgcg, cgls should be disabled too. */
5759                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5760                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5761                if (temp != data)
5762                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5763                /* enable interrupts again for PG */
5764                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5765        }
5766
5767        gfx_v8_0_wait_for_rlc_serdes(adev);
5768
5769        amdgpu_gfx_rlc_exit_safe_mode(adev);
5770}
5771static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5772                                            bool enable)
5773{
5774        if (enable) {
5775                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5776                 * ===  MGCG + MGLS + TS(CG/LS) ===
5777                 */
5778                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5779                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5780        } else {
5781                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5782                 * ===  CGCG + CGLS ===
5783                 */
5784                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5785                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5786        }
5787        return 0;
5788}
5789
5790static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5791                                          enum amd_clockgating_state state)
5792{
5793        uint32_t msg_id, pp_state = 0;
5794        uint32_t pp_support_state = 0;
5795
5796        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5797                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5798                        pp_support_state = PP_STATE_SUPPORT_LS;
5799                        pp_state = PP_STATE_LS;
5800                }
5801                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5802                        pp_support_state |= PP_STATE_SUPPORT_CG;
5803                        pp_state |= PP_STATE_CG;
5804                }
5805                if (state == AMD_CG_STATE_UNGATE)
5806                        pp_state = 0;
5807
5808                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5809                                PP_BLOCK_GFX_CG,
5810                                pp_support_state,
5811                                pp_state);
5812                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5813                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5814        }
5815
5816        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5817                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5818                        pp_support_state = PP_STATE_SUPPORT_LS;
5819                        pp_state = PP_STATE_LS;
5820                }
5821
5822                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5823                        pp_support_state |= PP_STATE_SUPPORT_CG;
5824                        pp_state |= PP_STATE_CG;
5825                }
5826
5827                if (state == AMD_CG_STATE_UNGATE)
5828                        pp_state = 0;
5829
5830                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5831                                PP_BLOCK_GFX_MG,
5832                                pp_support_state,
5833                                pp_state);
5834                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5835                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5836        }
5837
5838        return 0;
5839}
5840
5841static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5842                                          enum amd_clockgating_state state)
5843{
5844
5845        uint32_t msg_id, pp_state = 0;
5846        uint32_t pp_support_state = 0;
5847
5848        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5849                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5850                        pp_support_state = PP_STATE_SUPPORT_LS;
5851                        pp_state = PP_STATE_LS;
5852                }
5853                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5854                        pp_support_state |= PP_STATE_SUPPORT_CG;
5855                        pp_state |= PP_STATE_CG;
5856                }
5857                if (state == AMD_CG_STATE_UNGATE)
5858                        pp_state = 0;
5859
5860                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5861                                PP_BLOCK_GFX_CG,
5862                                pp_support_state,
5863                                pp_state);
5864                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5865                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5866        }
5867
5868        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5869                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5870                        pp_support_state = PP_STATE_SUPPORT_LS;
5871                        pp_state = PP_STATE_LS;
5872                }
5873                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5874                        pp_support_state |= PP_STATE_SUPPORT_CG;
5875                        pp_state |= PP_STATE_CG;
5876                }
5877                if (state == AMD_CG_STATE_UNGATE)
5878                        pp_state = 0;
5879
5880                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5881                                PP_BLOCK_GFX_3D,
5882                                pp_support_state,
5883                                pp_state);
5884                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5885                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5886        }
5887
5888        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5889                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5890                        pp_support_state = PP_STATE_SUPPORT_LS;
5891                        pp_state = PP_STATE_LS;
5892                }
5893
5894                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5895                        pp_support_state |= PP_STATE_SUPPORT_CG;
5896                        pp_state |= PP_STATE_CG;
5897                }
5898
5899                if (state == AMD_CG_STATE_UNGATE)
5900                        pp_state = 0;
5901
5902                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5903                                PP_BLOCK_GFX_MG,
5904                                pp_support_state,
5905                                pp_state);
5906                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5907                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5908        }
5909
5910        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5911                pp_support_state = PP_STATE_SUPPORT_LS;
5912
5913                if (state == AMD_CG_STATE_UNGATE)
5914                        pp_state = 0;
5915                else
5916                        pp_state = PP_STATE_LS;
5917
5918                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5919                                PP_BLOCK_GFX_RLC,
5920                                pp_support_state,
5921                                pp_state);
5922                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5923                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5924        }
5925
5926        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5927                pp_support_state = PP_STATE_SUPPORT_LS;
5928
5929                if (state == AMD_CG_STATE_UNGATE)
5930                        pp_state = 0;
5931                else
5932                        pp_state = PP_STATE_LS;
5933                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5934                        PP_BLOCK_GFX_CP,
5935                        pp_support_state,
5936                        pp_state);
5937                if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5938                        amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5939        }
5940
5941        return 0;
5942}
5943
5944static int gfx_v8_0_set_clockgating_state(void *handle,
5945                                          enum amd_clockgating_state state)
5946{
5947        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5948
5949        if (amdgpu_sriov_vf(adev))
5950                return 0;
5951
5952        switch (adev->asic_type) {
5953        case CHIP_FIJI:
5954        case CHIP_CARRIZO:
5955        case CHIP_STONEY:
5956                gfx_v8_0_update_gfx_clock_gating(adev,
5957                                                 state == AMD_CG_STATE_GATE);
5958                break;
5959        case CHIP_TONGA:
5960                gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5961                break;
5962        case CHIP_POLARIS10:
5963        case CHIP_POLARIS11:
5964        case CHIP_POLARIS12:
5965        case CHIP_VEGAM:
5966                gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5967                break;
5968        default:
5969                break;
5970        }
5971        return 0;
5972}
5973
5974static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5975{
5976        return ring->adev->wb.wb[ring->rptr_offs];
5977}
5978
5979static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5980{
5981        struct amdgpu_device *adev = ring->adev;
5982
5983        if (ring->use_doorbell)
5984                /* XXX check if swapping is necessary on BE */
5985                return ring->adev->wb.wb[ring->wptr_offs];
5986        else
5987                return RREG32(mmCP_RB0_WPTR);
5988}
5989
5990static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5991{
5992        struct amdgpu_device *adev = ring->adev;
5993
5994        if (ring->use_doorbell) {
5995                /* XXX check if swapping is necessary on BE */
5996                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5997                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5998        } else {
5999                WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6000                (void)RREG32(mmCP_RB0_WPTR);
6001        }
6002}
6003
6004static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6005{
6006        u32 ref_and_mask, reg_mem_engine;
6007
6008        if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6009            (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6010                switch (ring->me) {
6011                case 1:
6012                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6013                        break;
6014                case 2:
6015                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6016                        break;
6017                default:
6018                        return;
6019                }
6020                reg_mem_engine = 0;
6021        } else {
6022                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6023                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6024        }
6025
6026        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6027        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6028                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6029                                 reg_mem_engine));
6030        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6031        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6032        amdgpu_ring_write(ring, ref_and_mask);
6033        amdgpu_ring_write(ring, ref_and_mask);
6034        amdgpu_ring_write(ring, 0x20); /* poll interval */
6035}
6036
6037static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6038{
6039        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6040        amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6041                EVENT_INDEX(4));
6042
6043        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6044        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6045                EVENT_INDEX(0));
6046}
6047
6048static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6049                                        struct amdgpu_job *job,
6050                                        struct amdgpu_ib *ib,
6051                                        uint32_t flags)
6052{
6053        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6054        u32 header, control = 0;
6055
6056        if (ib->flags & AMDGPU_IB_FLAG_CE)
6057                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6058        else
6059                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6060
6061        control |= ib->length_dw | (vmid << 24);
6062
6063        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6064                control |= INDIRECT_BUFFER_PRE_ENB(1);
6065
6066                if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6067                        gfx_v8_0_ring_emit_de_meta(ring);
6068        }
6069
6070        amdgpu_ring_write(ring, header);
6071        amdgpu_ring_write(ring,
6072#ifdef __BIG_ENDIAN
6073                          (2 << 0) |
6074#endif
6075                          (ib->gpu_addr & 0xFFFFFFFC));
6076        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6077        amdgpu_ring_write(ring, control);
6078}
6079
6080static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6081                                          struct amdgpu_job *job,
6082                                          struct amdgpu_ib *ib,
6083                                          uint32_t flags)
6084{
6085        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6086        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6087
6088        /* Currently, there is a high possibility to get wave ID mismatch
6089         * between ME and GDS, leading to a hw deadlock, because ME generates
6090         * different wave IDs than the GDS expects. This situation happens
6091         * randomly when at least 5 compute pipes use GDS ordered append.
6092         * The wave IDs generated by ME are also wrong after suspend/resume.
6093         * Those are probably bugs somewhere else in the kernel driver.
6094         *
6095         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6096         * GDS to 0 for this ring (me/pipe).
6097         */
6098        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6099                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6100                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6101                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6102        }
6103
6104        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6105        amdgpu_ring_write(ring,
6106#ifdef __BIG_ENDIAN
6107                                (2 << 0) |
6108#endif
6109                                (ib->gpu_addr & 0xFFFFFFFC));
6110        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6111        amdgpu_ring_write(ring, control);
6112}
6113
6114static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6115                                         u64 seq, unsigned flags)
6116{
6117        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6118        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6119
6120        /* EVENT_WRITE_EOP - flush caches, send int */
6121        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6122        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6123                                 EOP_TC_ACTION_EN |
6124                                 EOP_TC_WB_ACTION_EN |
6125                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6126                                 EVENT_INDEX(5)));
6127        amdgpu_ring_write(ring, addr & 0xfffffffc);
6128        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6129                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6130        amdgpu_ring_write(ring, lower_32_bits(seq));
6131        amdgpu_ring_write(ring, upper_32_bits(seq));
6132
6133}
6134
6135static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6136{
6137        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6138        uint32_t seq = ring->fence_drv.sync_seq;
6139        uint64_t addr = ring->fence_drv.gpu_addr;
6140
6141        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6142        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6143                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6144                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6145        amdgpu_ring_write(ring, addr & 0xfffffffc);
6146        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6147        amdgpu_ring_write(ring, seq);
6148        amdgpu_ring_write(ring, 0xffffffff);
6149        amdgpu_ring_write(ring, 4); /* poll interval */
6150}
6151
6152static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6153                                        unsigned vmid, uint64_t pd_addr)
6154{
6155        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6156
6157        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6158
6159        /* wait for the invalidate to complete */
6160        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6161        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6162                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6163                                 WAIT_REG_MEM_ENGINE(0))); /* me */
6164        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6165        amdgpu_ring_write(ring, 0);
6166        amdgpu_ring_write(ring, 0); /* ref */
6167        amdgpu_ring_write(ring, 0); /* mask */
6168        amdgpu_ring_write(ring, 0x20); /* poll interval */
6169
6170        /* compute doesn't have PFP */
6171        if (usepfp) {
6172                /* sync PFP to ME, otherwise we might get invalid PFP reads */
6173                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6174                amdgpu_ring_write(ring, 0x0);
6175        }
6176}
6177
6178static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6179{
6180        return ring->adev->wb.wb[ring->wptr_offs];
6181}
6182
6183static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6184{
6185        struct amdgpu_device *adev = ring->adev;
6186
6187        /* XXX check if swapping is necessary on BE */
6188        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6189        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6190}
6191
6192static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6193                                           bool acquire)
6194{
6195        struct amdgpu_device *adev = ring->adev;
6196        int pipe_num, tmp, reg;
6197        int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6198
6199        pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6200
6201        /* first me only has 2 entries, GFX and HP3D */
6202        if (ring->me > 0)
6203                pipe_num -= 2;
6204
6205        reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6206        tmp = RREG32(reg);
6207        tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6208        WREG32(reg, tmp);
6209}
6210
6211static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6212                                            struct amdgpu_ring *ring,
6213                                            bool acquire)
6214{
6215        int i, pipe;
6216        bool reserve;
6217        struct amdgpu_ring *iring;
6218
6219        mutex_lock(&adev->gfx.pipe_reserve_mutex);
6220        pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6221        if (acquire)
6222                set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6223        else
6224                clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6225
6226        if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6227                /* Clear all reservations - everyone reacquires all resources */
6228                for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6229                        gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6230                                                       true);
6231
6232                for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6233                        gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6234                                                       true);
6235        } else {
6236                /* Lower all pipes without a current reservation */
6237                for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6238                        iring = &adev->gfx.gfx_ring[i];
6239                        pipe = amdgpu_gfx_queue_to_bit(adev,
6240                                                       iring->me,
6241                                                       iring->pipe,
6242                                                       0);
6243                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6244                        gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6245                }
6246
6247                for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6248                        iring = &adev->gfx.compute_ring[i];
6249                        pipe = amdgpu_gfx_queue_to_bit(adev,
6250                                                       iring->me,
6251                                                       iring->pipe,
6252                                                       0);
6253                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6254                        gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6255                }
6256        }
6257
6258        mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6259}
6260
6261static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6262                                      struct amdgpu_ring *ring,
6263                                      bool acquire)
6264{
6265        uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6266        uint32_t queue_priority = acquire ? 0xf : 0x0;
6267
6268        mutex_lock(&adev->srbm_mutex);
6269        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6270
6271        WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6272        WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6273
6274        vi_srbm_select(adev, 0, 0, 0, 0);
6275        mutex_unlock(&adev->srbm_mutex);
6276}
6277static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6278                                               enum drm_sched_priority priority)
6279{
6280        struct amdgpu_device *adev = ring->adev;
6281        bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6282
6283        if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6284                return;
6285
6286        gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6287        gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6288}
6289
6290static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6291                                             u64 addr, u64 seq,
6292                                             unsigned flags)
6293{
6294        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6295        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6296
6297        /* RELEASE_MEM - flush caches, send int */
6298        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6299        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6300                                 EOP_TC_ACTION_EN |
6301                                 EOP_TC_WB_ACTION_EN |
6302                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6303                                 EVENT_INDEX(5)));
6304        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6305        amdgpu_ring_write(ring, addr & 0xfffffffc);
6306        amdgpu_ring_write(ring, upper_32_bits(addr));
6307        amdgpu_ring_write(ring, lower_32_bits(seq));
6308        amdgpu_ring_write(ring, upper_32_bits(seq));
6309}
6310
6311static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6312                                         u64 seq, unsigned int flags)
6313{
6314        /* we only allocate 32bit for each seq wb address */
6315        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6316
6317        /* write fence seq to the "addr" */
6318        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6319        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6320                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6321        amdgpu_ring_write(ring, lower_32_bits(addr));
6322        amdgpu_ring_write(ring, upper_32_bits(addr));
6323        amdgpu_ring_write(ring, lower_32_bits(seq));
6324
6325        if (flags & AMDGPU_FENCE_FLAG_INT) {
6326                /* set register to trigger INT */
6327                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6328                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6329                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6330                amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6331                amdgpu_ring_write(ring, 0);
6332                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6333        }
6334}
6335
6336static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6337{
6338        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6339        amdgpu_ring_write(ring, 0);
6340}
6341
6342static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6343{
6344        uint32_t dw2 = 0;
6345
6346        if (amdgpu_sriov_vf(ring->adev))
6347                gfx_v8_0_ring_emit_ce_meta(ring);
6348
6349        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6350        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6351                gfx_v8_0_ring_emit_vgt_flush(ring);
6352                /* set load_global_config & load_global_uconfig */
6353                dw2 |= 0x8001;
6354                /* set load_cs_sh_regs */
6355                dw2 |= 0x01000000;
6356                /* set load_per_context_state & load_gfx_sh_regs for GFX */
6357                dw2 |= 0x10002;
6358
6359                /* set load_ce_ram if preamble presented */
6360                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6361                        dw2 |= 0x10000000;
6362        } else {
6363                /* still load_ce_ram if this is the first time preamble presented
6364                 * although there is no context switch happens.
6365                 */
6366                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6367                        dw2 |= 0x10000000;
6368        }
6369
6370        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6371        amdgpu_ring_write(ring, dw2);
6372        amdgpu_ring_write(ring, 0);
6373}
6374
6375static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6376{
6377        unsigned ret;
6378
6379        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6380        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6381        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6382        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6383        ret = ring->wptr & ring->buf_mask;
6384        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6385        return ret;
6386}
6387
6388static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6389{
6390        unsigned cur;
6391
6392        BUG_ON(offset > ring->buf_mask);
6393        BUG_ON(ring->ring[offset] != 0x55aa55aa);
6394
6395        cur = (ring->wptr & ring->buf_mask) - 1;
6396        if (likely(cur > offset))
6397                ring->ring[offset] = cur - offset;
6398        else
6399                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6400}
6401
6402static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6403{
6404        struct amdgpu_device *adev = ring->adev;
6405
6406        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6407        amdgpu_ring_write(ring, 0 |     /* src: register*/
6408                                (5 << 8) |      /* dst: memory */
6409                                (1 << 20));     /* write confirm */
6410        amdgpu_ring_write(ring, reg);
6411        amdgpu_ring_write(ring, 0);
6412        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6413                                adev->virt.reg_val_offs * 4));
6414        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6415                                adev->virt.reg_val_offs * 4));
6416}
6417
6418static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6419                                  uint32_t val)
6420{
6421        uint32_t cmd;
6422
6423        switch (ring->funcs->type) {
6424        case AMDGPU_RING_TYPE_GFX:
6425                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6426                break;
6427        case AMDGPU_RING_TYPE_KIQ:
6428                cmd = 1 << 16; /* no inc addr */
6429                break;
6430        default:
6431                cmd = WR_CONFIRM;
6432                break;
6433        }
6434
6435        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6436        amdgpu_ring_write(ring, cmd);
6437        amdgpu_ring_write(ring, reg);
6438        amdgpu_ring_write(ring, 0);
6439        amdgpu_ring_write(ring, val);
6440}
6441
6442static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6443{
6444        struct amdgpu_device *adev = ring->adev;
6445        uint32_t value = 0;
6446
6447        value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6448        value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6449        value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6450        value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6451        WREG32(mmSQ_CMD, value);
6452}
6453
6454static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6455                                                 enum amdgpu_interrupt_state state)
6456{
6457        WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6458                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6459}
6460
6461static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6462                                                     int me, int pipe,
6463                                                     enum amdgpu_interrupt_state state)
6464{
6465        u32 mec_int_cntl, mec_int_cntl_reg;
6466
6467        /*
6468         * amdgpu controls only the first MEC. That's why this function only
6469         * handles the setting of interrupts for this specific MEC. All other
6470         * pipes' interrupts are set by amdkfd.
6471         */
6472
6473        if (me == 1) {
6474                switch (pipe) {
6475                case 0:
6476                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6477                        break;
6478                case 1:
6479                        mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6480                        break;
6481                case 2:
6482                        mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6483                        break;
6484                case 3:
6485                        mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6486                        break;
6487                default:
6488                        DRM_DEBUG("invalid pipe %d\n", pipe);
6489                        return;
6490                }
6491        } else {
6492                DRM_DEBUG("invalid me %d\n", me);
6493                return;
6494        }
6495
6496        switch (state) {
6497        case AMDGPU_IRQ_STATE_DISABLE:
6498                mec_int_cntl = RREG32(mec_int_cntl_reg);
6499                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6500                WREG32(mec_int_cntl_reg, mec_int_cntl);
6501                break;
6502        case AMDGPU_IRQ_STATE_ENABLE:
6503                mec_int_cntl = RREG32(mec_int_cntl_reg);
6504                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6505                WREG32(mec_int_cntl_reg, mec_int_cntl);
6506                break;
6507        default:
6508                break;
6509        }
6510}
6511
6512static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6513                                             struct amdgpu_irq_src *source,
6514                                             unsigned type,
6515                                             enum amdgpu_interrupt_state state)
6516{
6517        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6518                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6519
6520        return 0;
6521}
6522
6523static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6524                                              struct amdgpu_irq_src *source,
6525                                              unsigned type,
6526                                              enum amdgpu_interrupt_state state)
6527{
6528        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6529                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6530
6531        return 0;
6532}
6533
6534static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6535                                            struct amdgpu_irq_src *src,
6536                                            unsigned type,
6537                                            enum amdgpu_interrupt_state state)
6538{
6539        switch (type) {
6540        case AMDGPU_CP_IRQ_GFX_EOP:
6541                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6542                break;
6543        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6544                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6545                break;
6546        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6547                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6548                break;
6549        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6550                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6551                break;
6552        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6553                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6554                break;
6555        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6556                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6557                break;
6558        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6559                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6560                break;
6561        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6562                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6563                break;
6564        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6565                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6566                break;
6567        default:
6568                break;
6569        }
6570        return 0;
6571}
6572
6573static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6574                                         struct amdgpu_irq_src *source,
6575                                         unsigned int type,
6576                                         enum amdgpu_interrupt_state state)
6577{
6578        int enable_flag;
6579
6580        switch (state) {
6581        case AMDGPU_IRQ_STATE_DISABLE:
6582                enable_flag = 0;
6583                break;
6584
6585        case AMDGPU_IRQ_STATE_ENABLE:
6586                enable_flag = 1;
6587                break;
6588
6589        default:
6590                return -EINVAL;
6591        }
6592
6593        WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6594        WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6595        WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6596        WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6597        WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6598        WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6599                     enable_flag);
6600        WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6601                     enable_flag);
6602        WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6603                     enable_flag);
6604        WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6605                     enable_flag);
6606        WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6607                     enable_flag);
6608        WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6609                     enable_flag);
6610        WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6611                     enable_flag);
6612        WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6613                     enable_flag);
6614
6615        return 0;
6616}
6617
6618static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6619                                     struct amdgpu_irq_src *source,
6620                                     unsigned int type,
6621                                     enum amdgpu_interrupt_state state)
6622{
6623        int enable_flag;
6624
6625        switch (state) {
6626        case AMDGPU_IRQ_STATE_DISABLE:
6627                enable_flag = 1;
6628                break;
6629
6630        case AMDGPU_IRQ_STATE_ENABLE:
6631                enable_flag = 0;
6632                break;
6633
6634        default:
6635                return -EINVAL;
6636        }
6637
6638        WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6639                     enable_flag);
6640
6641        return 0;
6642}
6643
6644static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6645                            struct amdgpu_irq_src *source,
6646                            struct amdgpu_iv_entry *entry)
6647{
6648        int i;
6649        u8 me_id, pipe_id, queue_id;
6650        struct amdgpu_ring *ring;
6651
6652        DRM_DEBUG("IH: CP EOP\n");
6653        me_id = (entry->ring_id & 0x0c) >> 2;
6654        pipe_id = (entry->ring_id & 0x03) >> 0;
6655        queue_id = (entry->ring_id & 0x70) >> 4;
6656
6657        switch (me_id) {
6658        case 0:
6659                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6660                break;
6661        case 1:
6662        case 2:
6663                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6664                        ring = &adev->gfx.compute_ring[i];
6665                        /* Per-queue interrupt is supported for MEC starting from VI.
6666                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6667                          */
6668                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6669                                amdgpu_fence_process(ring);
6670                }
6671                break;
6672        }
6673        return 0;
6674}
6675
6676static void gfx_v8_0_fault(struct amdgpu_device *adev,
6677                           struct amdgpu_iv_entry *entry)
6678{
6679        u8 me_id, pipe_id, queue_id;
6680        struct amdgpu_ring *ring;
6681        int i;
6682
6683        me_id = (entry->ring_id & 0x0c) >> 2;
6684        pipe_id = (entry->ring_id & 0x03) >> 0;
6685        queue_id = (entry->ring_id & 0x70) >> 4;
6686
6687        switch (me_id) {
6688        case 0:
6689                drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6690                break;
6691        case 1:
6692        case 2:
6693                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6694                        ring = &adev->gfx.compute_ring[i];
6695                        if (ring->me == me_id && ring->pipe == pipe_id &&
6696                            ring->queue == queue_id)
6697                                drm_sched_fault(&ring->sched);
6698                }
6699                break;
6700        }
6701}
6702
6703static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6704                                 struct amdgpu_irq_src *source,
6705                                 struct amdgpu_iv_entry *entry)
6706{
6707        DRM_ERROR("Illegal register access in command stream\n");
6708        gfx_v8_0_fault(adev, entry);
6709        return 0;
6710}
6711
6712static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6713                                  struct amdgpu_irq_src *source,
6714                                  struct amdgpu_iv_entry *entry)
6715{
6716        DRM_ERROR("Illegal instruction in command stream\n");
6717        gfx_v8_0_fault(adev, entry);
6718        return 0;
6719}
6720
6721static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6722                                     struct amdgpu_irq_src *source,
6723                                     struct amdgpu_iv_entry *entry)
6724{
6725        DRM_ERROR("CP EDC/ECC error detected.");
6726        return 0;
6727}
6728
6729static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6730{
6731        u32 enc, se_id, sh_id, cu_id;
6732        char type[20];
6733        int sq_edc_source = -1;
6734
6735        enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6736        se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6737
6738        switch (enc) {
6739                case 0:
6740                        DRM_INFO("SQ general purpose intr detected:"
6741                                        "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6742                                        "host_cmd_overflow %d, cmd_timestamp %d,"
6743                                        "reg_timestamp %d, thread_trace_buff_full %d,"
6744                                        "wlt %d, thread_trace %d.\n",
6745                                        se_id,
6746                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6747                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6748                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6749                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6750                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6751                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6752                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6753                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6754                                        );
6755                        break;
6756                case 1:
6757                case 2:
6758
6759                        cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6760                        sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6761
6762                        /*
6763                         * This function can be called either directly from ISR
6764                         * or from BH in which case we can access SQ_EDC_INFO
6765                         * instance
6766                         */
6767                        if (in_task()) {
6768                                mutex_lock(&adev->grbm_idx_mutex);
6769                                gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6770
6771                                sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6772
6773                                gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6774                                mutex_unlock(&adev->grbm_idx_mutex);
6775                        }
6776
6777                        if (enc == 1)
6778                                sprintf(type, "instruction intr");
6779                        else
6780                                sprintf(type, "EDC/ECC error");
6781
6782                        DRM_INFO(
6783                                "SQ %s detected: "
6784                                        "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6785                                        "trap %s, sq_ed_info.source %s.\n",
6786                                        type, se_id, sh_id, cu_id,
6787                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6788                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6789                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6790                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6791                                        (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6792                                );
6793                        break;
6794                default:
6795                        DRM_ERROR("SQ invalid encoding type\n.");
6796        }
6797}
6798
6799static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6800{
6801
6802        struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6803        struct sq_work *sq_work = container_of(work, struct sq_work, work);
6804
6805        gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6806}
6807
6808static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6809                           struct amdgpu_irq_src *source,
6810                           struct amdgpu_iv_entry *entry)
6811{
6812        unsigned ih_data = entry->src_data[0];
6813
6814        /*
6815         * Try to submit work so SQ_EDC_INFO can be accessed from
6816         * BH. If previous work submission hasn't finished yet
6817         * just print whatever info is possible directly from the ISR.
6818         */
6819        if (work_pending(&adev->gfx.sq_work.work)) {
6820                gfx_v8_0_parse_sq_irq(adev, ih_data);
6821        } else {
6822                adev->gfx.sq_work.ih_data = ih_data;
6823                schedule_work(&adev->gfx.sq_work.work);
6824        }
6825
6826        return 0;
6827}
6828
6829static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6830        .name = "gfx_v8_0",
6831        .early_init = gfx_v8_0_early_init,
6832        .late_init = gfx_v8_0_late_init,
6833        .sw_init = gfx_v8_0_sw_init,
6834        .sw_fini = gfx_v8_0_sw_fini,
6835        .hw_init = gfx_v8_0_hw_init,
6836        .hw_fini = gfx_v8_0_hw_fini,
6837        .suspend = gfx_v8_0_suspend,
6838        .resume = gfx_v8_0_resume,
6839        .is_idle = gfx_v8_0_is_idle,
6840        .wait_for_idle = gfx_v8_0_wait_for_idle,
6841        .check_soft_reset = gfx_v8_0_check_soft_reset,
6842        .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6843        .soft_reset = gfx_v8_0_soft_reset,
6844        .post_soft_reset = gfx_v8_0_post_soft_reset,
6845        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6846        .set_powergating_state = gfx_v8_0_set_powergating_state,
6847        .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6848};
6849
6850static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6851        .type = AMDGPU_RING_TYPE_GFX,
6852        .align_mask = 0xff,
6853        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6854        .support_64bit_ptrs = false,
6855        .get_rptr = gfx_v8_0_ring_get_rptr,
6856        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6857        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6858        .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6859                5 +  /* COND_EXEC */
6860                7 +  /* PIPELINE_SYNC */
6861                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6862                8 +  /* FENCE for VM_FLUSH */
6863                20 + /* GDS switch */
6864                4 + /* double SWITCH_BUFFER,
6865                       the first COND_EXEC jump to the place just
6866                           prior to this double SWITCH_BUFFER  */
6867                5 + /* COND_EXEC */
6868                7 +      /*     HDP_flush */
6869                4 +      /*     VGT_flush */
6870                14 + /* CE_META */
6871                31 + /* DE_META */
6872                3 + /* CNTX_CTRL */
6873                5 + /* HDP_INVL */
6874                8 + 8 + /* FENCE x2 */
6875                2, /* SWITCH_BUFFER */
6876        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6877        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6878        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6879        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6880        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6881        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6882        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6883        .test_ring = gfx_v8_0_ring_test_ring,
6884        .test_ib = gfx_v8_0_ring_test_ib,
6885        .insert_nop = amdgpu_ring_insert_nop,
6886        .pad_ib = amdgpu_ring_generic_pad_ib,
6887        .emit_switch_buffer = gfx_v8_ring_emit_sb,
6888        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6889        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6890        .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6891        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6892        .soft_recovery = gfx_v8_0_ring_soft_recovery,
6893};
6894
6895static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6896        .type = AMDGPU_RING_TYPE_COMPUTE,
6897        .align_mask = 0xff,
6898        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6899        .support_64bit_ptrs = false,
6900        .get_rptr = gfx_v8_0_ring_get_rptr,
6901        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6902        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6903        .emit_frame_size =
6904                20 + /* gfx_v8_0_ring_emit_gds_switch */
6905                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6906                5 + /* hdp_invalidate */
6907                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6908                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6909                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6910        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6911        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6912        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6913        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6914        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6915        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6916        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6917        .test_ring = gfx_v8_0_ring_test_ring,
6918        .test_ib = gfx_v8_0_ring_test_ib,
6919        .insert_nop = amdgpu_ring_insert_nop,
6920        .pad_ib = amdgpu_ring_generic_pad_ib,
6921        .set_priority = gfx_v8_0_ring_set_priority_compute,
6922        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6923};
6924
6925static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6926        .type = AMDGPU_RING_TYPE_KIQ,
6927        .align_mask = 0xff,
6928        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6929        .support_64bit_ptrs = false,
6930        .get_rptr = gfx_v8_0_ring_get_rptr,
6931        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6932        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6933        .emit_frame_size =
6934                20 + /* gfx_v8_0_ring_emit_gds_switch */
6935                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6936                5 + /* hdp_invalidate */
6937                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6938                17 + /* gfx_v8_0_ring_emit_vm_flush */
6939                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6940        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6941        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6942        .test_ring = gfx_v8_0_ring_test_ring,
6943        .insert_nop = amdgpu_ring_insert_nop,
6944        .pad_ib = amdgpu_ring_generic_pad_ib,
6945        .emit_rreg = gfx_v8_0_ring_emit_rreg,
6946        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6947};
6948
6949static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6950{
6951        int i;
6952
6953        adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6954
6955        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6956                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6957
6958        for (i = 0; i < adev->gfx.num_compute_rings; i++)
6959                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6960}
6961
6962static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6963        .set = gfx_v8_0_set_eop_interrupt_state,
6964        .process = gfx_v8_0_eop_irq,
6965};
6966
6967static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6968        .set = gfx_v8_0_set_priv_reg_fault_state,
6969        .process = gfx_v8_0_priv_reg_irq,
6970};
6971
6972static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6973        .set = gfx_v8_0_set_priv_inst_fault_state,
6974        .process = gfx_v8_0_priv_inst_irq,
6975};
6976
6977static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6978        .set = gfx_v8_0_set_cp_ecc_int_state,
6979        .process = gfx_v8_0_cp_ecc_error_irq,
6980};
6981
6982static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6983        .set = gfx_v8_0_set_sq_int_state,
6984        .process = gfx_v8_0_sq_irq,
6985};
6986
6987static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6988{
6989        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6990        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6991
6992        adev->gfx.priv_reg_irq.num_types = 1;
6993        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6994
6995        adev->gfx.priv_inst_irq.num_types = 1;
6996        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6997
6998        adev->gfx.cp_ecc_error_irq.num_types = 1;
6999        adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7000
7001        adev->gfx.sq_irq.num_types = 1;
7002        adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7003}
7004
7005static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7006{
7007        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7008}
7009
7010static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7011{
7012        /* init asci gds info */
7013        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7014        adev->gds.gws.total_size = 64;
7015        adev->gds.oa.total_size = 16;
7016        adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7017
7018        if (adev->gds.mem.total_size == 64 * 1024) {
7019                adev->gds.mem.gfx_partition_size = 4096;
7020                adev->gds.mem.cs_partition_size = 4096;
7021
7022                adev->gds.gws.gfx_partition_size = 4;
7023                adev->gds.gws.cs_partition_size = 4;
7024
7025                adev->gds.oa.gfx_partition_size = 4;
7026                adev->gds.oa.cs_partition_size = 1;
7027        } else {
7028                adev->gds.mem.gfx_partition_size = 1024;
7029                adev->gds.mem.cs_partition_size = 1024;
7030
7031                adev->gds.gws.gfx_partition_size = 16;
7032                adev->gds.gws.cs_partition_size = 16;
7033
7034                adev->gds.oa.gfx_partition_size = 4;
7035                adev->gds.oa.cs_partition_size = 4;
7036        }
7037}
7038
7039static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7040                                                 u32 bitmap)
7041{
7042        u32 data;
7043
7044        if (!bitmap)
7045                return;
7046
7047        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7048        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7049
7050        WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7051}
7052
7053static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7054{
7055        u32 data, mask;
7056
7057        data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7058                RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7059
7060        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7061
7062        return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7063}
7064
7065static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7066{
7067        int i, j, k, counter, active_cu_number = 0;
7068        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7069        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7070        unsigned disable_masks[4 * 2];
7071        u32 ao_cu_num;
7072
7073        memset(cu_info, 0, sizeof(*cu_info));
7074
7075        if (adev->flags & AMD_IS_APU)
7076                ao_cu_num = 2;
7077        else
7078                ao_cu_num = adev->gfx.config.max_cu_per_sh;
7079
7080        amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7081
7082        mutex_lock(&adev->grbm_idx_mutex);
7083        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7084                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7085                        mask = 1;
7086                        ao_bitmap = 0;
7087                        counter = 0;
7088                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7089                        if (i < 4 && j < 2)
7090                                gfx_v8_0_set_user_cu_inactive_bitmap(
7091                                        adev, disable_masks[i * 2 + j]);
7092                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7093                        cu_info->bitmap[i][j] = bitmap;
7094
7095                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7096                                if (bitmap & mask) {
7097                                        if (counter < ao_cu_num)
7098                                                ao_bitmap |= mask;
7099                                        counter ++;
7100                                }
7101                                mask <<= 1;
7102                        }
7103                        active_cu_number += counter;
7104                        if (i < 2 && j < 2)
7105                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7106                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7107                }
7108        }
7109        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7110        mutex_unlock(&adev->grbm_idx_mutex);
7111
7112        cu_info->number = active_cu_number;
7113        cu_info->ao_cu_mask = ao_cu_mask;
7114        cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7115        cu_info->max_waves_per_simd = 10;
7116        cu_info->max_scratch_slots_per_cu = 32;
7117        cu_info->wave_front_size = 64;
7118        cu_info->lds_size = 64;
7119}
7120
7121const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7122{
7123        .type = AMD_IP_BLOCK_TYPE_GFX,
7124        .major = 8,
7125        .minor = 0,
7126        .rev = 0,
7127        .funcs = &gfx_v8_0_ip_funcs,
7128};
7129
7130const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7131{
7132        .type = AMD_IP_BLOCK_TYPE_GFX,
7133        .major = 8,
7134        .minor = 1,
7135        .rev = 0,
7136        .funcs = &gfx_v8_0_ip_funcs,
7137};
7138
7139static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7140{
7141        uint64_t ce_payload_addr;
7142        int cnt_ce;
7143        union {
7144                struct vi_ce_ib_state regular;
7145                struct vi_ce_ib_state_chained_ib chained;
7146        } ce_payload = {};
7147
7148        if (ring->adev->virt.chained_ib_support) {
7149                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7150                        offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7151                cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7152        } else {
7153                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7154                        offsetof(struct vi_gfx_meta_data, ce_payload);
7155                cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7156        }
7157
7158        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7159        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7160                                WRITE_DATA_DST_SEL(8) |
7161                                WR_CONFIRM) |
7162                                WRITE_DATA_CACHE_POLICY(0));
7163        amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7164        amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7165        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7166}
7167
7168static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7169{
7170        uint64_t de_payload_addr, gds_addr, csa_addr;
7171        int cnt_de;
7172        union {
7173                struct vi_de_ib_state regular;
7174                struct vi_de_ib_state_chained_ib chained;
7175        } de_payload = {};
7176
7177        csa_addr = amdgpu_csa_vaddr(ring->adev);
7178        gds_addr = csa_addr + 4096;
7179        if (ring->adev->virt.chained_ib_support) {
7180                de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7181                de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7182                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7183                cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7184        } else {
7185                de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7186                de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7187                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7188                cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7189        }
7190
7191        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7192        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7193                                WRITE_DATA_DST_SEL(8) |
7194                                WR_CONFIRM) |
7195                                WRITE_DATA_CACHE_POLICY(0));
7196        amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7197        amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7198        amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7199}
7200