linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91        BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92        BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93        BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94        BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95        BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96        BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199        mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204        mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213        mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311        mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341        mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351        mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372        mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373        mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374        mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382        mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383        mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419        mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429        mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475        mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479        mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480        mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486        mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487        mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573        mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576        mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579        mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580        mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601        mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603        mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604        mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605        mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606        mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607        mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608        mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609        mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610        mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611        mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612        mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613        mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614        mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615        mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616        mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617        mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618        mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619        mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620        mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621        mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622        mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623        mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624        mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625        mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626        mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627        mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628        mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629        mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630        mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632        mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633        mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634        mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635        mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636        mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637        mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638        mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639        mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640        mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641        mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642        mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643        mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644        mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645        mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646        mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647        mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648        mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649        mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650        mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651        mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652        mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653        mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654        mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655        mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656        mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657        mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658        mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659        mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660        mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661        mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662        mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663        mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664        mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665        mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666        mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667        mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668        mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669        mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670        mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671        mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673        mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675        mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680        mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681        mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682        mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683        mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684        mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685        mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686        mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687        mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688        mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689        mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695        mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696        mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700        mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701        mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707        mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715        "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716        "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717        "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718        "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719        "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720        "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721        "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738        uint32_t data;
 739
 740        switch (adev->asic_type) {
 741        case CHIP_TOPAZ:
 742                amdgpu_device_program_register_sequence(adev,
 743                                                        iceland_mgcg_cgcg_init,
 744                                                        ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745                amdgpu_device_program_register_sequence(adev,
 746                                                        golden_settings_iceland_a11,
 747                                                        ARRAY_SIZE(golden_settings_iceland_a11));
 748                amdgpu_device_program_register_sequence(adev,
 749                                                        iceland_golden_common_all,
 750                                                        ARRAY_SIZE(iceland_golden_common_all));
 751                break;
 752        case CHIP_FIJI:
 753                amdgpu_device_program_register_sequence(adev,
 754                                                        fiji_mgcg_cgcg_init,
 755                                                        ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756                amdgpu_device_program_register_sequence(adev,
 757                                                        golden_settings_fiji_a10,
 758                                                        ARRAY_SIZE(golden_settings_fiji_a10));
 759                amdgpu_device_program_register_sequence(adev,
 760                                                        fiji_golden_common_all,
 761                                                        ARRAY_SIZE(fiji_golden_common_all));
 762                break;
 763
 764        case CHIP_TONGA:
 765                amdgpu_device_program_register_sequence(adev,
 766                                                        tonga_mgcg_cgcg_init,
 767                                                        ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768                amdgpu_device_program_register_sequence(adev,
 769                                                        golden_settings_tonga_a11,
 770                                                        ARRAY_SIZE(golden_settings_tonga_a11));
 771                amdgpu_device_program_register_sequence(adev,
 772                                                        tonga_golden_common_all,
 773                                                        ARRAY_SIZE(tonga_golden_common_all));
 774                break;
 775        case CHIP_VEGAM:
 776                amdgpu_device_program_register_sequence(adev,
 777                                                        golden_settings_vegam_a11,
 778                                                        ARRAY_SIZE(golden_settings_vegam_a11));
 779                amdgpu_device_program_register_sequence(adev,
 780                                                        vegam_golden_common_all,
 781                                                        ARRAY_SIZE(vegam_golden_common_all));
 782                break;
 783        case CHIP_POLARIS11:
 784        case CHIP_POLARIS12:
 785                amdgpu_device_program_register_sequence(adev,
 786                                                        golden_settings_polaris11_a11,
 787                                                        ARRAY_SIZE(golden_settings_polaris11_a11));
 788                amdgpu_device_program_register_sequence(adev,
 789                                                        polaris11_golden_common_all,
 790                                                        ARRAY_SIZE(polaris11_golden_common_all));
 791                break;
 792        case CHIP_POLARIS10:
 793                amdgpu_device_program_register_sequence(adev,
 794                                                        golden_settings_polaris10_a11,
 795                                                        ARRAY_SIZE(golden_settings_polaris10_a11));
 796                amdgpu_device_program_register_sequence(adev,
 797                                                        polaris10_golden_common_all,
 798                                                        ARRAY_SIZE(polaris10_golden_common_all));
 799                data = RREG32_SMC(ixCG_ACLK_CNTL);
 800                data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801                data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802                WREG32_SMC(ixCG_ACLK_CNTL, data);
 803                if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804                    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805                     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806                     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808                        amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809                }
 810                break;
 811        case CHIP_CARRIZO:
 812                amdgpu_device_program_register_sequence(adev,
 813                                                        cz_mgcg_cgcg_init,
 814                                                        ARRAY_SIZE(cz_mgcg_cgcg_init));
 815                amdgpu_device_program_register_sequence(adev,
 816                                                        cz_golden_settings_a11,
 817                                                        ARRAY_SIZE(cz_golden_settings_a11));
 818                amdgpu_device_program_register_sequence(adev,
 819                                                        cz_golden_common_all,
 820                                                        ARRAY_SIZE(cz_golden_common_all));
 821                break;
 822        case CHIP_STONEY:
 823                amdgpu_device_program_register_sequence(adev,
 824                                                        stoney_mgcg_cgcg_init,
 825                                                        ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826                amdgpu_device_program_register_sequence(adev,
 827                                                        stoney_golden_settings_a11,
 828                                                        ARRAY_SIZE(stoney_golden_settings_a11));
 829                amdgpu_device_program_register_sequence(adev,
 830                                                        stoney_golden_common_all,
 831                                                        ARRAY_SIZE(stoney_golden_common_all));
 832                break;
 833        default:
 834                break;
 835        }
 836}
 837
 838static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 839{
 840        adev->gfx.scratch.num_reg = 8;
 841        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 842        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 843}
 844
 845static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 846{
 847        struct amdgpu_device *adev = ring->adev;
 848        uint32_t scratch;
 849        uint32_t tmp = 0;
 850        unsigned i;
 851        int r;
 852
 853        r = amdgpu_gfx_scratch_get(adev, &scratch);
 854        if (r)
 855                return r;
 856
 857        WREG32(scratch, 0xCAFEDEAD);
 858        r = amdgpu_ring_alloc(ring, 3);
 859        if (r)
 860                goto error_free_scratch;
 861
 862        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 863        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 864        amdgpu_ring_write(ring, 0xDEADBEEF);
 865        amdgpu_ring_commit(ring);
 866
 867        for (i = 0; i < adev->usec_timeout; i++) {
 868                tmp = RREG32(scratch);
 869                if (tmp == 0xDEADBEEF)
 870                        break;
 871                udelay(1);
 872        }
 873
 874        if (i >= adev->usec_timeout)
 875                r = -ETIMEDOUT;
 876
 877error_free_scratch:
 878        amdgpu_gfx_scratch_free(adev, scratch);
 879        return r;
 880}
 881
 882static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 883{
 884        struct amdgpu_device *adev = ring->adev;
 885        struct amdgpu_ib ib;
 886        struct dma_fence *f = NULL;
 887
 888        unsigned int index;
 889        uint64_t gpu_addr;
 890        uint32_t tmp;
 891        long r;
 892
 893        r = amdgpu_device_wb_get(adev, &index);
 894        if (r)
 895                return r;
 896
 897        gpu_addr = adev->wb.gpu_addr + (index * 4);
 898        adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 899        memset(&ib, 0, sizeof(ib));
 900        r = amdgpu_ib_get(adev, NULL, 16,
 901                                        AMDGPU_IB_POOL_DIRECT, &ib);
 902        if (r)
 903                goto err1;
 904
 905        ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 906        ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 907        ib.ptr[2] = lower_32_bits(gpu_addr);
 908        ib.ptr[3] = upper_32_bits(gpu_addr);
 909        ib.ptr[4] = 0xDEADBEEF;
 910        ib.length_dw = 5;
 911
 912        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 913        if (r)
 914                goto err2;
 915
 916        r = dma_fence_wait_timeout(f, false, timeout);
 917        if (r == 0) {
 918                r = -ETIMEDOUT;
 919                goto err2;
 920        } else if (r < 0) {
 921                goto err2;
 922        }
 923
 924        tmp = adev->wb.wb[index];
 925        if (tmp == 0xDEADBEEF)
 926                r = 0;
 927        else
 928                r = -EINVAL;
 929
 930err2:
 931        amdgpu_ib_free(adev, &ib, NULL);
 932        dma_fence_put(f);
 933err1:
 934        amdgpu_device_wb_free(adev, index);
 935        return r;
 936}
 937
 938
 939static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 940{
 941        release_firmware(adev->gfx.pfp_fw);
 942        adev->gfx.pfp_fw = NULL;
 943        release_firmware(adev->gfx.me_fw);
 944        adev->gfx.me_fw = NULL;
 945        release_firmware(adev->gfx.ce_fw);
 946        adev->gfx.ce_fw = NULL;
 947        release_firmware(adev->gfx.rlc_fw);
 948        adev->gfx.rlc_fw = NULL;
 949        release_firmware(adev->gfx.mec_fw);
 950        adev->gfx.mec_fw = NULL;
 951        if ((adev->asic_type != CHIP_STONEY) &&
 952            (adev->asic_type != CHIP_TOPAZ))
 953                release_firmware(adev->gfx.mec2_fw);
 954        adev->gfx.mec2_fw = NULL;
 955
 956        kfree(adev->gfx.rlc.register_list_format);
 957}
 958
 959static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 960{
 961        const char *chip_name;
 962        char fw_name[30];
 963        int err;
 964        struct amdgpu_firmware_info *info = NULL;
 965        const struct common_firmware_header *header = NULL;
 966        const struct gfx_firmware_header_v1_0 *cp_hdr;
 967        const struct rlc_firmware_header_v2_0 *rlc_hdr;
 968        unsigned int *tmp = NULL, i;
 969
 970        DRM_DEBUG("\n");
 971
 972        switch (adev->asic_type) {
 973        case CHIP_TOPAZ:
 974                chip_name = "topaz";
 975                break;
 976        case CHIP_TONGA:
 977                chip_name = "tonga";
 978                break;
 979        case CHIP_CARRIZO:
 980                chip_name = "carrizo";
 981                break;
 982        case CHIP_FIJI:
 983                chip_name = "fiji";
 984                break;
 985        case CHIP_STONEY:
 986                chip_name = "stoney";
 987                break;
 988        case CHIP_POLARIS10:
 989                chip_name = "polaris10";
 990                break;
 991        case CHIP_POLARIS11:
 992                chip_name = "polaris11";
 993                break;
 994        case CHIP_POLARIS12:
 995                chip_name = "polaris12";
 996                break;
 997        case CHIP_VEGAM:
 998                chip_name = "vegam";
 999                break;
1000        default:
1001                BUG();
1002        }
1003
1004        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007                if (err == -ENOENT) {
1008                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009                        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1010                }
1011        } else {
1012                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013                err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014        }
1015        if (err)
1016                goto out;
1017        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1018        if (err)
1019                goto out;
1020        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021        adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022        adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027                if (err == -ENOENT) {
1028                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029                        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1030                }
1031        } else {
1032                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033                err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034        }
1035        if (err)
1036                goto out;
1037        err = amdgpu_ucode_validate(adev->gfx.me_fw);
1038        if (err)
1039                goto out;
1040        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041        adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042
1043        adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044
1045        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048                if (err == -ENOENT) {
1049                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050                        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1051                }
1052        } else {
1053                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054                err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055        }
1056        if (err)
1057                goto out;
1058        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1059        if (err)
1060                goto out;
1061        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062        adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063        adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1064
1065        /*
1066         * Support for MCBP/Virtualization in combination with chained IBs is
1067         * formal released on feature version #46
1068         */
1069        if (adev->gfx.ce_feature_version >= 46 &&
1070            adev->gfx.pfp_feature_version >= 46) {
1071                adev->virt.chained_ib_support = true;
1072                DRM_INFO("Chained IB support enabled!\n");
1073        } else
1074                adev->virt.chained_ib_support = false;
1075
1076        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1078        if (err)
1079                goto out;
1080        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081        rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082        adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083        adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084
1085        adev->gfx.rlc.save_and_restore_offset =
1086                        le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087        adev->gfx.rlc.clear_state_descriptor_offset =
1088                        le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089        adev->gfx.rlc.avail_scratch_ram_locations =
1090                        le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091        adev->gfx.rlc.reg_restore_list_size =
1092                        le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093        adev->gfx.rlc.reg_list_format_start =
1094                        le32_to_cpu(rlc_hdr->reg_list_format_start);
1095        adev->gfx.rlc.reg_list_format_separate_start =
1096                        le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097        adev->gfx.rlc.starting_offsets_start =
1098                        le32_to_cpu(rlc_hdr->starting_offsets_start);
1099        adev->gfx.rlc.reg_list_format_size_bytes =
1100                        le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101        adev->gfx.rlc.reg_list_size_bytes =
1102                        le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103
1104        adev->gfx.rlc.register_list_format =
1105                        kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106                                        adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107
1108        if (!adev->gfx.rlc.register_list_format) {
1109                err = -ENOMEM;
1110                goto out;
1111        }
1112
1113        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114                        le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115        for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1116                adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1117
1118        adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119
1120        tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121                        le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122        for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1123                adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124
1125        if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128                if (err == -ENOENT) {
1129                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130                        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1131                }
1132        } else {
1133                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134                err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135        }
1136        if (err)
1137                goto out;
1138        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1139        if (err)
1140                goto out;
1141        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144
1145        if ((adev->asic_type != CHIP_STONEY) &&
1146            (adev->asic_type != CHIP_TOPAZ)) {
1147                if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150                        if (err == -ENOENT) {
1151                                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152                                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1153                        }
1154                } else {
1155                        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156                        err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157                }
1158                if (!err) {
1159                        err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1160                        if (err)
1161                                goto out;
1162                        cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163                                adev->gfx.mec2_fw->data;
1164                        adev->gfx.mec2_fw_version =
1165                                le32_to_cpu(cp_hdr->header.ucode_version);
1166                        adev->gfx.mec2_feature_version =
1167                                le32_to_cpu(cp_hdr->ucode_feature_version);
1168                } else {
1169                        err = 0;
1170                        adev->gfx.mec2_fw = NULL;
1171                }
1172        }
1173
1174        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1175        info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1176        info->fw = adev->gfx.pfp_fw;
1177        header = (const struct common_firmware_header *)info->fw->data;
1178        adev->firmware.fw_size +=
1179                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1182        info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1183        info->fw = adev->gfx.me_fw;
1184        header = (const struct common_firmware_header *)info->fw->data;
1185        adev->firmware.fw_size +=
1186                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1189        info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1190        info->fw = adev->gfx.ce_fw;
1191        header = (const struct common_firmware_header *)info->fw->data;
1192        adev->firmware.fw_size +=
1193                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1196        info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1197        info->fw = adev->gfx.rlc_fw;
1198        header = (const struct common_firmware_header *)info->fw->data;
1199        adev->firmware.fw_size +=
1200                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1201
1202        info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1203        info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1204        info->fw = adev->gfx.mec_fw;
1205        header = (const struct common_firmware_header *)info->fw->data;
1206        adev->firmware.fw_size +=
1207                ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208
1209        /* we need account JT in */
1210        cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1211        adev->firmware.fw_size +=
1212                ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1213
1214        if (amdgpu_sriov_vf(adev)) {
1215                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1216                info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1217                info->fw = adev->gfx.mec_fw;
1218                adev->firmware.fw_size +=
1219                        ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1220        }
1221
1222        if (adev->gfx.mec2_fw) {
1223                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1224                info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1225                info->fw = adev->gfx.mec2_fw;
1226                header = (const struct common_firmware_header *)info->fw->data;
1227                adev->firmware.fw_size +=
1228                        ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1229        }
1230
1231out:
1232        if (err) {
1233                dev_err(adev->dev,
1234                        "gfx8: Failed to load firmware \"%s\"\n",
1235                        fw_name);
1236                release_firmware(adev->gfx.pfp_fw);
1237                adev->gfx.pfp_fw = NULL;
1238                release_firmware(adev->gfx.me_fw);
1239                adev->gfx.me_fw = NULL;
1240                release_firmware(adev->gfx.ce_fw);
1241                adev->gfx.ce_fw = NULL;
1242                release_firmware(adev->gfx.rlc_fw);
1243                adev->gfx.rlc_fw = NULL;
1244                release_firmware(adev->gfx.mec_fw);
1245                adev->gfx.mec_fw = NULL;
1246                release_firmware(adev->gfx.mec2_fw);
1247                adev->gfx.mec2_fw = NULL;
1248        }
1249        return err;
1250}
1251
1252static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1253                                    volatile u32 *buffer)
1254{
1255        u32 count = 0, i;
1256        const struct cs_section_def *sect = NULL;
1257        const struct cs_extent_def *ext = NULL;
1258
1259        if (adev->gfx.rlc.cs_data == NULL)
1260                return;
1261        if (buffer == NULL)
1262                return;
1263
1264        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1265        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1266
1267        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1268        buffer[count++] = cpu_to_le32(0x80000000);
1269        buffer[count++] = cpu_to_le32(0x80000000);
1270
1271        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1272                for (ext = sect->section; ext->extent != NULL; ++ext) {
1273                        if (sect->id == SECT_CONTEXT) {
1274                                buffer[count++] =
1275                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1276                                buffer[count++] = cpu_to_le32(ext->reg_index -
1277                                                PACKET3_SET_CONTEXT_REG_START);
1278                                for (i = 0; i < ext->reg_count; i++)
1279                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
1280                        } else {
1281                                return;
1282                        }
1283                }
1284        }
1285
1286        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1287        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1288                        PACKET3_SET_CONTEXT_REG_START);
1289        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1290        buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1291
1292        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1293        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1294
1295        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1296        buffer[count++] = cpu_to_le32(0);
1297}
1298
1299static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1300{
1301        if (adev->asic_type == CHIP_CARRIZO)
1302                return 5;
1303        else
1304                return 4;
1305}
1306
1307static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308{
1309        const struct cs_section_def *cs_data;
1310        int r;
1311
1312        adev->gfx.rlc.cs_data = vi_cs_data;
1313
1314        cs_data = adev->gfx.rlc.cs_data;
1315
1316        if (cs_data) {
1317                /* init clear state block */
1318                r = amdgpu_gfx_rlc_init_csb(adev);
1319                if (r)
1320                        return r;
1321        }
1322
1323        if ((adev->asic_type == CHIP_CARRIZO) ||
1324            (adev->asic_type == CHIP_STONEY)) {
1325                adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1326                r = amdgpu_gfx_rlc_init_cpt(adev);
1327                if (r)
1328                        return r;
1329        }
1330
1331        /* init spm vmid with 0xf */
1332        if (adev->gfx.rlc.funcs->update_spm_vmid)
1333                adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1334
1335        return 0;
1336}
1337
1338static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1339{
1340        amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1341}
1342
1343static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1344{
1345        int r;
1346        u32 *hpd;
1347        size_t mec_hpd_size;
1348
1349        bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1350
1351        /* take ownership of the relevant compute queues */
1352        amdgpu_gfx_compute_queue_acquire(adev);
1353
1354        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1355        if (mec_hpd_size) {
1356                r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1357                                              AMDGPU_GEM_DOMAIN_VRAM,
1358                                              &adev->gfx.mec.hpd_eop_obj,
1359                                              &adev->gfx.mec.hpd_eop_gpu_addr,
1360                                              (void **)&hpd);
1361                if (r) {
1362                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1363                        return r;
1364                }
1365
1366                memset(hpd, 0, mec_hpd_size);
1367
1368                amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1369                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1370        }
1371
1372        return 0;
1373}
1374
1375static const u32 vgpr_init_compute_shader[] =
1376{
1377        0x7e000209, 0x7e020208,
1378        0x7e040207, 0x7e060206,
1379        0x7e080205, 0x7e0a0204,
1380        0x7e0c0203, 0x7e0e0202,
1381        0x7e100201, 0x7e120200,
1382        0x7e140209, 0x7e160208,
1383        0x7e180207, 0x7e1a0206,
1384        0x7e1c0205, 0x7e1e0204,
1385        0x7e200203, 0x7e220202,
1386        0x7e240201, 0x7e260200,
1387        0x7e280209, 0x7e2a0208,
1388        0x7e2c0207, 0x7e2e0206,
1389        0x7e300205, 0x7e320204,
1390        0x7e340203, 0x7e360202,
1391        0x7e380201, 0x7e3a0200,
1392        0x7e3c0209, 0x7e3e0208,
1393        0x7e400207, 0x7e420206,
1394        0x7e440205, 0x7e460204,
1395        0x7e480203, 0x7e4a0202,
1396        0x7e4c0201, 0x7e4e0200,
1397        0x7e500209, 0x7e520208,
1398        0x7e540207, 0x7e560206,
1399        0x7e580205, 0x7e5a0204,
1400        0x7e5c0203, 0x7e5e0202,
1401        0x7e600201, 0x7e620200,
1402        0x7e640209, 0x7e660208,
1403        0x7e680207, 0x7e6a0206,
1404        0x7e6c0205, 0x7e6e0204,
1405        0x7e700203, 0x7e720202,
1406        0x7e740201, 0x7e760200,
1407        0x7e780209, 0x7e7a0208,
1408        0x7e7c0207, 0x7e7e0206,
1409        0xbf8a0000, 0xbf810000,
1410};
1411
1412static const u32 sgpr_init_compute_shader[] =
1413{
1414        0xbe8a0100, 0xbe8c0102,
1415        0xbe8e0104, 0xbe900106,
1416        0xbe920108, 0xbe940100,
1417        0xbe960102, 0xbe980104,
1418        0xbe9a0106, 0xbe9c0108,
1419        0xbe9e0100, 0xbea00102,
1420        0xbea20104, 0xbea40106,
1421        0xbea60108, 0xbea80100,
1422        0xbeaa0102, 0xbeac0104,
1423        0xbeae0106, 0xbeb00108,
1424        0xbeb20100, 0xbeb40102,
1425        0xbeb60104, 0xbeb80106,
1426        0xbeba0108, 0xbebc0100,
1427        0xbebe0102, 0xbec00104,
1428        0xbec20106, 0xbec40108,
1429        0xbec60100, 0xbec80102,
1430        0xbee60004, 0xbee70005,
1431        0xbeea0006, 0xbeeb0007,
1432        0xbee80008, 0xbee90009,
1433        0xbefc0000, 0xbf8a0000,
1434        0xbf810000, 0x00000000,
1435};
1436
1437static const u32 vgpr_init_regs[] =
1438{
1439        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1440        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1441        mmCOMPUTE_NUM_THREAD_X, 256*4,
1442        mmCOMPUTE_NUM_THREAD_Y, 1,
1443        mmCOMPUTE_NUM_THREAD_Z, 1,
1444        mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1445        mmCOMPUTE_PGM_RSRC2, 20,
1446        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456};
1457
1458static const u32 sgpr1_init_regs[] =
1459{
1460        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1461        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1462        mmCOMPUTE_NUM_THREAD_X, 256*5,
1463        mmCOMPUTE_NUM_THREAD_Y, 1,
1464        mmCOMPUTE_NUM_THREAD_Z, 1,
1465        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1466        mmCOMPUTE_PGM_RSRC2, 20,
1467        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477};
1478
1479static const u32 sgpr2_init_regs[] =
1480{
1481        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1482        mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483        mmCOMPUTE_NUM_THREAD_X, 256*5,
1484        mmCOMPUTE_NUM_THREAD_Y, 1,
1485        mmCOMPUTE_NUM_THREAD_Z, 1,
1486        mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1487        mmCOMPUTE_PGM_RSRC2, 20,
1488        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1489        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1490        mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1491        mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1492        mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1493        mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1494        mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1495        mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1496        mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1497        mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1498};
1499
1500static const u32 sec_ded_counter_registers[] =
1501{
1502        mmCPC_EDC_ATC_CNT,
1503        mmCPC_EDC_SCRATCH_CNT,
1504        mmCPC_EDC_UCODE_CNT,
1505        mmCPF_EDC_ATC_CNT,
1506        mmCPF_EDC_ROQ_CNT,
1507        mmCPF_EDC_TAG_CNT,
1508        mmCPG_EDC_ATC_CNT,
1509        mmCPG_EDC_DMA_CNT,
1510        mmCPG_EDC_TAG_CNT,
1511        mmDC_EDC_CSINVOC_CNT,
1512        mmDC_EDC_RESTORE_CNT,
1513        mmDC_EDC_STATE_CNT,
1514        mmGDS_EDC_CNT,
1515        mmGDS_EDC_GRBM_CNT,
1516        mmGDS_EDC_OA_DED,
1517        mmSPI_EDC_CNT,
1518        mmSQC_ATC_EDC_GATCL1_CNT,
1519        mmSQC_EDC_CNT,
1520        mmSQ_EDC_DED_CNT,
1521        mmSQ_EDC_INFO,
1522        mmSQ_EDC_SEC_CNT,
1523        mmTCC_EDC_CNT,
1524        mmTCP_ATC_EDC_GATCL1_CNT,
1525        mmTCP_EDC_CNT,
1526        mmTD_EDC_CNT
1527};
1528
1529static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1530{
1531        struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1532        struct amdgpu_ib ib;
1533        struct dma_fence *f = NULL;
1534        int r, i;
1535        u32 tmp;
1536        unsigned total_size, vgpr_offset, sgpr_offset;
1537        u64 gpu_addr;
1538
1539        /* only supported on CZ */
1540        if (adev->asic_type != CHIP_CARRIZO)
1541                return 0;
1542
1543        /* bail if the compute ring is not ready */
1544        if (!ring->sched.ready)
1545                return 0;
1546
1547        tmp = RREG32(mmGB_EDC_MODE);
1548        WREG32(mmGB_EDC_MODE, 0);
1549
1550        total_size =
1551                (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1552        total_size +=
1553                (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554        total_size +=
1555                (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1556        total_size = ALIGN(total_size, 256);
1557        vgpr_offset = total_size;
1558        total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1559        sgpr_offset = total_size;
1560        total_size += sizeof(sgpr_init_compute_shader);
1561
1562        /* allocate an indirect buffer to put the commands in */
1563        memset(&ib, 0, sizeof(ib));
1564        r = amdgpu_ib_get(adev, NULL, total_size,
1565                                        AMDGPU_IB_POOL_DIRECT, &ib);
1566        if (r) {
1567                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1568                return r;
1569        }
1570
1571        /* load the compute shaders */
1572        for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1573                ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1574
1575        for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1576                ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1577
1578        /* init the ib length to 0 */
1579        ib.length_dw = 0;
1580
1581        /* VGPR */
1582        /* write the register state for the compute dispatch */
1583        for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1584                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1585                ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1586                ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1587        }
1588        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1589        gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1590        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1591        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1592        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1593        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1594
1595        /* write dispatch packet */
1596        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1597        ib.ptr[ib.length_dw++] = 8; /* x */
1598        ib.ptr[ib.length_dw++] = 1; /* y */
1599        ib.ptr[ib.length_dw++] = 1; /* z */
1600        ib.ptr[ib.length_dw++] =
1601                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1602
1603        /* write CS partial flush packet */
1604        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1605        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1606
1607        /* SGPR1 */
1608        /* write the register state for the compute dispatch */
1609        for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1610                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1611                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1612                ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1613        }
1614        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1615        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1616        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1617        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1618        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1619        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1620
1621        /* write dispatch packet */
1622        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1623        ib.ptr[ib.length_dw++] = 8; /* x */
1624        ib.ptr[ib.length_dw++] = 1; /* y */
1625        ib.ptr[ib.length_dw++] = 1; /* z */
1626        ib.ptr[ib.length_dw++] =
1627                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1628
1629        /* write CS partial flush packet */
1630        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1631        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1632
1633        /* SGPR2 */
1634        /* write the register state for the compute dispatch */
1635        for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1636                ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1638                ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1639        }
1640        /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641        gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1642        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643        ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644        ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645        ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647        /* write dispatch packet */
1648        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649        ib.ptr[ib.length_dw++] = 8; /* x */
1650        ib.ptr[ib.length_dw++] = 1; /* y */
1651        ib.ptr[ib.length_dw++] = 1; /* z */
1652        ib.ptr[ib.length_dw++] =
1653                REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655        /* write CS partial flush packet */
1656        ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657        ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659        /* shedule the ib on the ring */
1660        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1661        if (r) {
1662                DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1663                goto fail;
1664        }
1665
1666        /* wait for the GPU to finish processing the IB */
1667        r = dma_fence_wait(f, false);
1668        if (r) {
1669                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1670                goto fail;
1671        }
1672
1673        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1674        tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1675        WREG32(mmGB_EDC_MODE, tmp);
1676
1677        tmp = RREG32(mmCC_GC_EDC_CONFIG);
1678        tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1679        WREG32(mmCC_GC_EDC_CONFIG, tmp);
1680
1681
1682        /* read back registers to clear the counters */
1683        for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1684                RREG32(sec_ded_counter_registers[i]);
1685
1686fail:
1687        amdgpu_ib_free(adev, &ib, NULL);
1688        dma_fence_put(f);
1689
1690        return r;
1691}
1692
1693static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1694{
1695        u32 gb_addr_config;
1696        u32 mc_arb_ramcfg;
1697        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1698        u32 tmp;
1699        int ret;
1700
1701        switch (adev->asic_type) {
1702        case CHIP_TOPAZ:
1703                adev->gfx.config.max_shader_engines = 1;
1704                adev->gfx.config.max_tile_pipes = 2;
1705                adev->gfx.config.max_cu_per_sh = 6;
1706                adev->gfx.config.max_sh_per_se = 1;
1707                adev->gfx.config.max_backends_per_se = 2;
1708                adev->gfx.config.max_texture_channel_caches = 2;
1709                adev->gfx.config.max_gprs = 256;
1710                adev->gfx.config.max_gs_threads = 32;
1711                adev->gfx.config.max_hw_contexts = 8;
1712
1713                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1714                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1715                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1716                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1717                gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1718                break;
1719        case CHIP_FIJI:
1720                adev->gfx.config.max_shader_engines = 4;
1721                adev->gfx.config.max_tile_pipes = 16;
1722                adev->gfx.config.max_cu_per_sh = 16;
1723                adev->gfx.config.max_sh_per_se = 1;
1724                adev->gfx.config.max_backends_per_se = 4;
1725                adev->gfx.config.max_texture_channel_caches = 16;
1726                adev->gfx.config.max_gprs = 256;
1727                adev->gfx.config.max_gs_threads = 32;
1728                adev->gfx.config.max_hw_contexts = 8;
1729
1730                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1735                break;
1736        case CHIP_POLARIS11:
1737        case CHIP_POLARIS12:
1738                ret = amdgpu_atombios_get_gfx_info(adev);
1739                if (ret)
1740                        return ret;
1741                adev->gfx.config.max_gprs = 256;
1742                adev->gfx.config.max_gs_threads = 32;
1743                adev->gfx.config.max_hw_contexts = 8;
1744
1745                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749                gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1750                break;
1751        case CHIP_POLARIS10:
1752        case CHIP_VEGAM:
1753                ret = amdgpu_atombios_get_gfx_info(adev);
1754                if (ret)
1755                        return ret;
1756                adev->gfx.config.max_gprs = 256;
1757                adev->gfx.config.max_gs_threads = 32;
1758                adev->gfx.config.max_hw_contexts = 8;
1759
1760                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1761                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1762                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1763                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1764                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1765                break;
1766        case CHIP_TONGA:
1767                adev->gfx.config.max_shader_engines = 4;
1768                adev->gfx.config.max_tile_pipes = 8;
1769                adev->gfx.config.max_cu_per_sh = 8;
1770                adev->gfx.config.max_sh_per_se = 1;
1771                adev->gfx.config.max_backends_per_se = 2;
1772                adev->gfx.config.max_texture_channel_caches = 8;
1773                adev->gfx.config.max_gprs = 256;
1774                adev->gfx.config.max_gs_threads = 32;
1775                adev->gfx.config.max_hw_contexts = 8;
1776
1777                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                break;
1783        case CHIP_CARRIZO:
1784                adev->gfx.config.max_shader_engines = 1;
1785                adev->gfx.config.max_tile_pipes = 2;
1786                adev->gfx.config.max_sh_per_se = 1;
1787                adev->gfx.config.max_backends_per_se = 2;
1788                adev->gfx.config.max_cu_per_sh = 8;
1789                adev->gfx.config.max_texture_channel_caches = 2;
1790                adev->gfx.config.max_gprs = 256;
1791                adev->gfx.config.max_gs_threads = 32;
1792                adev->gfx.config.max_hw_contexts = 8;
1793
1794                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1799                break;
1800        case CHIP_STONEY:
1801                adev->gfx.config.max_shader_engines = 1;
1802                adev->gfx.config.max_tile_pipes = 2;
1803                adev->gfx.config.max_sh_per_se = 1;
1804                adev->gfx.config.max_backends_per_se = 1;
1805                adev->gfx.config.max_cu_per_sh = 3;
1806                adev->gfx.config.max_texture_channel_caches = 2;
1807                adev->gfx.config.max_gprs = 256;
1808                adev->gfx.config.max_gs_threads = 16;
1809                adev->gfx.config.max_hw_contexts = 8;
1810
1811                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                break;
1817        default:
1818                adev->gfx.config.max_shader_engines = 2;
1819                adev->gfx.config.max_tile_pipes = 4;
1820                adev->gfx.config.max_cu_per_sh = 2;
1821                adev->gfx.config.max_sh_per_se = 1;
1822                adev->gfx.config.max_backends_per_se = 2;
1823                adev->gfx.config.max_texture_channel_caches = 4;
1824                adev->gfx.config.max_gprs = 256;
1825                adev->gfx.config.max_gs_threads = 32;
1826                adev->gfx.config.max_hw_contexts = 8;
1827
1828                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833                break;
1834        }
1835
1836        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1837        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1838
1839        adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1840                                MC_ARB_RAMCFG, NOOFBANK);
1841        adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1842                                MC_ARB_RAMCFG, NOOFRANKS);
1843
1844        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1845        adev->gfx.config.mem_max_burst_length_bytes = 256;
1846        if (adev->flags & AMD_IS_APU) {
1847                /* Get memory bank mapping mode. */
1848                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1849                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1850                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1851
1852                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1853                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1854                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1855
1856                /* Validate settings in case only one DIMM installed. */
1857                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1858                        dimm00_addr_map = 0;
1859                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1860                        dimm01_addr_map = 0;
1861                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1862                        dimm10_addr_map = 0;
1863                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1864                        dimm11_addr_map = 0;
1865
1866                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1867                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1868                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1869                        adev->gfx.config.mem_row_size_in_kb = 2;
1870                else
1871                        adev->gfx.config.mem_row_size_in_kb = 1;
1872        } else {
1873                tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1874                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1875                if (adev->gfx.config.mem_row_size_in_kb > 4)
1876                        adev->gfx.config.mem_row_size_in_kb = 4;
1877        }
1878
1879        adev->gfx.config.shader_engine_tile_size = 32;
1880        adev->gfx.config.num_gpus = 1;
1881        adev->gfx.config.multi_gpu_tile_size = 64;
1882
1883        /* fix up row size */
1884        switch (adev->gfx.config.mem_row_size_in_kb) {
1885        case 1:
1886        default:
1887                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1888                break;
1889        case 2:
1890                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1891                break;
1892        case 4:
1893                gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1894                break;
1895        }
1896        adev->gfx.config.gb_addr_config = gb_addr_config;
1897
1898        return 0;
1899}
1900
1901static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1902                                        int mec, int pipe, int queue)
1903{
1904        int r;
1905        unsigned irq_type;
1906        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1907        unsigned int hw_prio;
1908
1909        ring = &adev->gfx.compute_ring[ring_id];
1910
1911        /* mec0 is me1 */
1912        ring->me = mec + 1;
1913        ring->pipe = pipe;
1914        ring->queue = queue;
1915
1916        ring->ring_obj = NULL;
1917        ring->use_doorbell = true;
1918        ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1919        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1920                                + (ring_id * GFX8_MEC_HPD_SIZE);
1921        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1922
1923        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1924                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1925                + ring->pipe;
1926
1927        hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1928                        AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1929        /* type-2 packets are deprecated on MEC, use type-3 instead */
1930        r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1931                             hw_prio, NULL);
1932        if (r)
1933                return r;
1934
1935
1936        return 0;
1937}
1938
1939static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1940
1941static int gfx_v8_0_sw_init(void *handle)
1942{
1943        int i, j, k, r, ring_id;
1944        struct amdgpu_ring *ring;
1945        struct amdgpu_kiq *kiq;
1946        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947
1948        switch (adev->asic_type) {
1949        case CHIP_TONGA:
1950        case CHIP_CARRIZO:
1951        case CHIP_FIJI:
1952        case CHIP_POLARIS10:
1953        case CHIP_POLARIS11:
1954        case CHIP_POLARIS12:
1955        case CHIP_VEGAM:
1956                adev->gfx.mec.num_mec = 2;
1957                break;
1958        case CHIP_TOPAZ:
1959        case CHIP_STONEY:
1960        default:
1961                adev->gfx.mec.num_mec = 1;
1962                break;
1963        }
1964
1965        adev->gfx.mec.num_pipe_per_mec = 4;
1966        adev->gfx.mec.num_queue_per_pipe = 8;
1967
1968        /* EOP Event */
1969        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1970        if (r)
1971                return r;
1972
1973        /* Privileged reg */
1974        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1975                              &adev->gfx.priv_reg_irq);
1976        if (r)
1977                return r;
1978
1979        /* Privileged inst */
1980        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1981                              &adev->gfx.priv_inst_irq);
1982        if (r)
1983                return r;
1984
1985        /* Add CP EDC/ECC irq  */
1986        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1987                              &adev->gfx.cp_ecc_error_irq);
1988        if (r)
1989                return r;
1990
1991        /* SQ interrupts. */
1992        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1993                              &adev->gfx.sq_irq);
1994        if (r) {
1995                DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1996                return r;
1997        }
1998
1999        INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2000
2001        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2002
2003        gfx_v8_0_scratch_init(adev);
2004
2005        r = gfx_v8_0_init_microcode(adev);
2006        if (r) {
2007                DRM_ERROR("Failed to load gfx firmware!\n");
2008                return r;
2009        }
2010
2011        r = adev->gfx.rlc.funcs->init(adev);
2012        if (r) {
2013                DRM_ERROR("Failed to init rlc BOs!\n");
2014                return r;
2015        }
2016
2017        r = gfx_v8_0_mec_init(adev);
2018        if (r) {
2019                DRM_ERROR("Failed to init MEC BOs!\n");
2020                return r;
2021        }
2022
2023        /* set up the gfx ring */
2024        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2025                ring = &adev->gfx.gfx_ring[i];
2026                ring->ring_obj = NULL;
2027                sprintf(ring->name, "gfx");
2028                /* no gfx doorbells on iceland */
2029                if (adev->asic_type != CHIP_TOPAZ) {
2030                        ring->use_doorbell = true;
2031                        ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2032                }
2033
2034                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2035                                     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2036                                     AMDGPU_RING_PRIO_DEFAULT, NULL);
2037                if (r)
2038                        return r;
2039        }
2040
2041
2042        /* set up the compute queues - allocate horizontally across pipes */
2043        ring_id = 0;
2044        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2045                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2046                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2047                                if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2048                                        continue;
2049
2050                                r = gfx_v8_0_compute_ring_init(adev,
2051                                                                ring_id,
2052                                                                i, k, j);
2053                                if (r)
2054                                        return r;
2055
2056                                ring_id++;
2057                        }
2058                }
2059        }
2060
2061        r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2062        if (r) {
2063                DRM_ERROR("Failed to init KIQ BOs!\n");
2064                return r;
2065        }
2066
2067        kiq = &adev->gfx.kiq;
2068        r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2069        if (r)
2070                return r;
2071
2072        /* create MQD for all compute queues as well as KIQ for SRIOV case */
2073        r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2074        if (r)
2075                return r;
2076
2077        adev->gfx.ce_ram_size = 0x8000;
2078
2079        r = gfx_v8_0_gpu_early_init(adev);
2080        if (r)
2081                return r;
2082
2083        return 0;
2084}
2085
2086static int gfx_v8_0_sw_fini(void *handle)
2087{
2088        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2089        int i;
2090
2091        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2092                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2093        for (i = 0; i < adev->gfx.num_compute_rings; i++)
2094                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2095
2096        amdgpu_gfx_mqd_sw_fini(adev);
2097        amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2098        amdgpu_gfx_kiq_fini(adev);
2099
2100        gfx_v8_0_mec_fini(adev);
2101        amdgpu_gfx_rlc_fini(adev);
2102        amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2103                                &adev->gfx.rlc.clear_state_gpu_addr,
2104                                (void **)&adev->gfx.rlc.cs_ptr);
2105        if ((adev->asic_type == CHIP_CARRIZO) ||
2106            (adev->asic_type == CHIP_STONEY)) {
2107                amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2108                                &adev->gfx.rlc.cp_table_gpu_addr,
2109                                (void **)&adev->gfx.rlc.cp_table_ptr);
2110        }
2111        gfx_v8_0_free_microcode(adev);
2112
2113        return 0;
2114}
2115
2116static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117{
2118        uint32_t *modearray, *mod2array;
2119        const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2120        const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2121        u32 reg_offset;
2122
2123        modearray = adev->gfx.config.tile_mode_array;
2124        mod2array = adev->gfx.config.macrotile_mode_array;
2125
2126        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2127                modearray[reg_offset] = 0;
2128
2129        for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2130                mod2array[reg_offset] = 0;
2131
2132        switch (adev->asic_type) {
2133        case CHIP_TOPAZ:
2134                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                PIPE_CONFIG(ADDR_SURF_P2) |
2136                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2137                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                PIPE_CONFIG(ADDR_SURF_P2) |
2140                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2141                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143                                PIPE_CONFIG(ADDR_SURF_P2) |
2144                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2145                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147                                PIPE_CONFIG(ADDR_SURF_P2) |
2148                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2149                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                PIPE_CONFIG(ADDR_SURF_P2) |
2152                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2153                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155                                PIPE_CONFIG(ADDR_SURF_P2) |
2156                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2157                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                PIPE_CONFIG(ADDR_SURF_P2) |
2160                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163                                PIPE_CONFIG(ADDR_SURF_P2));
2164                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                PIPE_CONFIG(ADDR_SURF_P2) |
2166                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2185                                 PIPE_CONFIG(ADDR_SURF_P2) |
2186                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                 PIPE_CONFIG(ADDR_SURF_P2) |
2190                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2193                                 PIPE_CONFIG(ADDR_SURF_P2) |
2194                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2197                                 PIPE_CONFIG(ADDR_SURF_P2) |
2198                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2201                                 PIPE_CONFIG(ADDR_SURF_P2) |
2202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2205                                 PIPE_CONFIG(ADDR_SURF_P2) |
2206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2209                                 PIPE_CONFIG(ADDR_SURF_P2) |
2210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2213                                 PIPE_CONFIG(ADDR_SURF_P2) |
2214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2217                                 PIPE_CONFIG(ADDR_SURF_P2) |
2218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2221                                 PIPE_CONFIG(ADDR_SURF_P2) |
2222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P2) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2227                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P2) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2231                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236
2237                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                NUM_BANKS(ADDR_SURF_8_BANK));
2241                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                NUM_BANKS(ADDR_SURF_8_BANK));
2245                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                NUM_BANKS(ADDR_SURF_8_BANK));
2249                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                NUM_BANKS(ADDR_SURF_8_BANK));
2253                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                NUM_BANKS(ADDR_SURF_8_BANK));
2257                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                NUM_BANKS(ADDR_SURF_8_BANK));
2261                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                NUM_BANKS(ADDR_SURF_8_BANK));
2265                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2266                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                NUM_BANKS(ADDR_SURF_16_BANK));
2269                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2270                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                NUM_BANKS(ADDR_SURF_16_BANK));
2273                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                 NUM_BANKS(ADDR_SURF_16_BANK));
2277                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280                                 NUM_BANKS(ADDR_SURF_16_BANK));
2281                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                 NUM_BANKS(ADDR_SURF_16_BANK));
2285                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_16_BANK));
2289                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292                                 NUM_BANKS(ADDR_SURF_8_BANK));
2293
2294                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2295                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296                            reg_offset != 23)
2297                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298
2299                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300                        if (reg_offset != 7)
2301                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2302
2303                break;
2304        case CHIP_FIJI:
2305        case CHIP_VEGAM:
2306                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428
2429                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                NUM_BANKS(ADDR_SURF_8_BANK));
2433                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                NUM_BANKS(ADDR_SURF_8_BANK));
2437                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                NUM_BANKS(ADDR_SURF_8_BANK));
2441                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                NUM_BANKS(ADDR_SURF_8_BANK));
2445                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                NUM_BANKS(ADDR_SURF_8_BANK));
2449                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                NUM_BANKS(ADDR_SURF_8_BANK));
2453                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                NUM_BANKS(ADDR_SURF_8_BANK));
2457                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                NUM_BANKS(ADDR_SURF_8_BANK));
2461                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                NUM_BANKS(ADDR_SURF_8_BANK));
2465                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                 NUM_BANKS(ADDR_SURF_8_BANK));
2477                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                 NUM_BANKS(ADDR_SURF_8_BANK));
2481                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                 NUM_BANKS(ADDR_SURF_4_BANK));
2485
2486                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488
2489                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490                        if (reg_offset != 7)
2491                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2492
2493                break;
2494        case CHIP_TONGA:
2495                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617
2618                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                NUM_BANKS(ADDR_SURF_16_BANK));
2622                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                NUM_BANKS(ADDR_SURF_16_BANK));
2626                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                NUM_BANKS(ADDR_SURF_16_BANK));
2630                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                NUM_BANKS(ADDR_SURF_16_BANK));
2634                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                NUM_BANKS(ADDR_SURF_16_BANK));
2638                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                NUM_BANKS(ADDR_SURF_16_BANK));
2642                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                NUM_BANKS(ADDR_SURF_16_BANK));
2646                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649                                NUM_BANKS(ADDR_SURF_16_BANK));
2650                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653                                NUM_BANKS(ADDR_SURF_16_BANK));
2654                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                 NUM_BANKS(ADDR_SURF_8_BANK));
2666                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669                                 NUM_BANKS(ADDR_SURF_4_BANK));
2670                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                 NUM_BANKS(ADDR_SURF_4_BANK));
2674
2675                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677
2678                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679                        if (reg_offset != 7)
2680                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2681
2682                break;
2683        case CHIP_POLARIS11:
2684        case CHIP_POLARIS12:
2685                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861                                NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866                                NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876                                NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882                        if (reg_offset != 7)
2883                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885                break;
2886        case CHIP_POLARIS10:
2887                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038                                NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058                                NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073                                NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078                                NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081                        WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084                        if (reg_offset != 7)
3085                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087                break;
3088        case CHIP_STONEY:
3089                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                PIPE_CONFIG(ADDR_SURF_P2) |
3091                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                PIPE_CONFIG(ADDR_SURF_P2) |
3095                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                PIPE_CONFIG(ADDR_SURF_P2) |
3099                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                PIPE_CONFIG(ADDR_SURF_P2) |
3103                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                PIPE_CONFIG(ADDR_SURF_P2) |
3107                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                PIPE_CONFIG(ADDR_SURF_P2) |
3111                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                PIPE_CONFIG(ADDR_SURF_P2) |
3115                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118                                PIPE_CONFIG(ADDR_SURF_P2));
3119                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120                                PIPE_CONFIG(ADDR_SURF_P2) |
3121                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2) |
3133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2) |
3137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                 PIPE_CONFIG(ADDR_SURF_P2) |
3145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148                                 PIPE_CONFIG(ADDR_SURF_P2) |
3149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152                                 PIPE_CONFIG(ADDR_SURF_P2) |
3153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156                                 PIPE_CONFIG(ADDR_SURF_P2) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160                                 PIPE_CONFIG(ADDR_SURF_P2) |
3161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164                                 PIPE_CONFIG(ADDR_SURF_P2) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168                                 PIPE_CONFIG(ADDR_SURF_P2) |
3169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172                                 PIPE_CONFIG(ADDR_SURF_P2) |
3173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176                                 PIPE_CONFIG(ADDR_SURF_P2) |
3177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                 PIPE_CONFIG(ADDR_SURF_P2) |
3181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184                                 PIPE_CONFIG(ADDR_SURF_P2) |
3185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                 PIPE_CONFIG(ADDR_SURF_P2) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                NUM_BANKS(ADDR_SURF_8_BANK));
3196                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                NUM_BANKS(ADDR_SURF_8_BANK));
3200                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                NUM_BANKS(ADDR_SURF_8_BANK));
3204                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                NUM_BANKS(ADDR_SURF_8_BANK));
3208                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                NUM_BANKS(ADDR_SURF_8_BANK));
3212                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                NUM_BANKS(ADDR_SURF_8_BANK));
3216                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                NUM_BANKS(ADDR_SURF_8_BANK));
3220                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                NUM_BANKS(ADDR_SURF_16_BANK));
3224                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                NUM_BANKS(ADDR_SURF_16_BANK));
3228                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                 NUM_BANKS(ADDR_SURF_16_BANK));
3232                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                 NUM_BANKS(ADDR_SURF_16_BANK));
3236                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                 NUM_BANKS(ADDR_SURF_16_BANK));
3240                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                 NUM_BANKS(ADDR_SURF_16_BANK));
3244                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                 NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251                            reg_offset != 23)
3252                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255                        if (reg_offset != 7)
3256                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258                break;
3259        default:
3260                dev_warn(adev->dev,
3261                         "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262                         adev->asic_type);
3263                fallthrough;
3264
3265        case CHIP_CARRIZO:
3266                modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                PIPE_CONFIG(ADDR_SURF_P2) |
3268                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3269                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271                                PIPE_CONFIG(ADDR_SURF_P2) |
3272                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3273                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                PIPE_CONFIG(ADDR_SURF_P2) |
3276                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3277                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                PIPE_CONFIG(ADDR_SURF_P2) |
3280                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3281                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                PIPE_CONFIG(ADDR_SURF_P2) |
3284                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3285                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287                                PIPE_CONFIG(ADDR_SURF_P2) |
3288                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3289                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291                                PIPE_CONFIG(ADDR_SURF_P2) |
3292                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3295                                PIPE_CONFIG(ADDR_SURF_P2));
3296                modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297                                PIPE_CONFIG(ADDR_SURF_P2) |
3298                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3299                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301                                 PIPE_CONFIG(ADDR_SURF_P2) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3303                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                 PIPE_CONFIG(ADDR_SURF_P2) |
3314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3315                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3317                                 PIPE_CONFIG(ADDR_SURF_P2) |
3318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3321                                 PIPE_CONFIG(ADDR_SURF_P2) |
3322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3324                modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3325                                 PIPE_CONFIG(ADDR_SURF_P2) |
3326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3329                                 PIPE_CONFIG(ADDR_SURF_P2) |
3330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3333                                 PIPE_CONFIG(ADDR_SURF_P2) |
3334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3337                                 PIPE_CONFIG(ADDR_SURF_P2) |
3338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3341                                 PIPE_CONFIG(ADDR_SURF_P2) |
3342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                 PIPE_CONFIG(ADDR_SURF_P2) |
3346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3347                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3349                                 PIPE_CONFIG(ADDR_SURF_P2) |
3350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3353                                 PIPE_CONFIG(ADDR_SURF_P2) |
3354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3357                                 PIPE_CONFIG(ADDR_SURF_P2) |
3358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3359                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3360                modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                 PIPE_CONFIG(ADDR_SURF_P2) |
3362                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3363                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3364                modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3365                                 PIPE_CONFIG(ADDR_SURF_P2) |
3366                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3368
3369                mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372                                NUM_BANKS(ADDR_SURF_8_BANK));
3373                mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3375                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                NUM_BANKS(ADDR_SURF_8_BANK));
3377                mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                NUM_BANKS(ADDR_SURF_8_BANK));
3381                mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3383                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3384                                NUM_BANKS(ADDR_SURF_8_BANK));
3385                mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388                                NUM_BANKS(ADDR_SURF_8_BANK));
3389                mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                NUM_BANKS(ADDR_SURF_8_BANK));
3393                mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                NUM_BANKS(ADDR_SURF_8_BANK));
3397                mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3398                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3399                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                NUM_BANKS(ADDR_SURF_16_BANK));
3401                mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3402                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3403                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                NUM_BANKS(ADDR_SURF_16_BANK));
3405                mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408                                 NUM_BANKS(ADDR_SURF_16_BANK));
3409                mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                 NUM_BANKS(ADDR_SURF_16_BANK));
3421                mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3424                                 NUM_BANKS(ADDR_SURF_8_BANK));
3425
3426                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3427                        if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3428                            reg_offset != 23)
3429                                WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3430
3431                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3432                        if (reg_offset != 7)
3433                                WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3434
3435                break;
3436        }
3437}
3438
3439static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3440                                  u32 se_num, u32 sh_num, u32 instance)
3441{
3442        u32 data;
3443
3444        if (instance == 0xffffffff)
3445                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3446        else
3447                data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3448
3449        if (se_num == 0xffffffff)
3450                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3451        else
3452                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3453
3454        if (sh_num == 0xffffffff)
3455                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3456        else
3457                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458
3459        WREG32(mmGRBM_GFX_INDEX, data);
3460}
3461
3462static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3463                                  u32 me, u32 pipe, u32 q, u32 vm)
3464{
3465        vi_srbm_select(adev, me, pipe, q, vm);
3466}
3467
3468static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3469{
3470        u32 data, mask;
3471
3472        data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3473                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3474
3475        data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3476
3477        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3478                                         adev->gfx.config.max_sh_per_se);
3479
3480        return (~data) & mask;
3481}
3482
3483static void
3484gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3485{
3486        switch (adev->asic_type) {
3487        case CHIP_FIJI:
3488        case CHIP_VEGAM:
3489                *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3490                          RB_XSEL2(1) | PKR_MAP(2) |
3491                          PKR_XSEL(1) | PKR_YSEL(1) |
3492                          SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3493                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3494                           SE_PAIR_YSEL(2);
3495                break;
3496        case CHIP_TONGA:
3497        case CHIP_POLARIS10:
3498                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3499                          SE_XSEL(1) | SE_YSEL(1);
3500                *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501                           SE_PAIR_YSEL(2);
3502                break;
3503        case CHIP_TOPAZ:
3504        case CHIP_CARRIZO:
3505                *rconf |= RB_MAP_PKR0(2);
3506                *rconf1 |= 0x0;
3507                break;
3508        case CHIP_POLARIS11:
3509        case CHIP_POLARIS12:
3510                *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3511                          SE_XSEL(1) | SE_YSEL(1);
3512                *rconf1 |= 0x0;
3513                break;
3514        case CHIP_STONEY:
3515                *rconf |= 0x0;
3516                *rconf1 |= 0x0;
3517                break;
3518        default:
3519                DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520                break;
3521        }
3522}
3523
3524static void
3525gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3526                                        u32 raster_config, u32 raster_config_1,
3527                                        unsigned rb_mask, unsigned num_rb)
3528{
3529        unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3530        unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3531        unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3532        unsigned rb_per_se = num_rb / num_se;
3533        unsigned se_mask[4];
3534        unsigned se;
3535
3536        se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3537        se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3538        se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3539        se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540
3541        WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3542        WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3543        WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544
3545        if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3546                             (!se_mask[2] && !se_mask[3]))) {
3547                raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548
3549                if (!se_mask[0] && !se_mask[1]) {
3550                        raster_config_1 |=
3551                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3552                } else {
3553                        raster_config_1 |=
3554                                SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3555                }
3556        }
3557
3558        for (se = 0; se < num_se; se++) {
3559                unsigned raster_config_se = raster_config;
3560                unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3561                unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3562                int idx = (se / 2) * 2;
3563
3564                if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3565                        raster_config_se &= ~SE_MAP_MASK;
3566
3567                        if (!se_mask[idx]) {
3568                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569                        } else {
3570                                raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3571                        }
3572                }
3573
3574                pkr0_mask &= rb_mask;
3575                pkr1_mask &= rb_mask;
3576                if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3577                        raster_config_se &= ~PKR_MAP_MASK;
3578
3579                        if (!pkr0_mask) {
3580                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581                        } else {
3582                                raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3583                        }
3584                }
3585
3586                if (rb_per_se >= 2) {
3587                        unsigned rb0_mask = 1 << (se * rb_per_se);
3588                        unsigned rb1_mask = rb0_mask << 1;
3589
3590                        rb0_mask &= rb_mask;
3591                        rb1_mask &= rb_mask;
3592                        if (!rb0_mask || !rb1_mask) {
3593                                raster_config_se &= ~RB_MAP_PKR0_MASK;
3594
3595                                if (!rb0_mask) {
3596                                        raster_config_se |=
3597                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3598                                } else {
3599                                        raster_config_se |=
3600                                                RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3601                                }
3602                        }
3603
3604                        if (rb_per_se > 2) {
3605                                rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3606                                rb1_mask = rb0_mask << 1;
3607                                rb0_mask &= rb_mask;
3608                                rb1_mask &= rb_mask;
3609                                if (!rb0_mask || !rb1_mask) {
3610                                        raster_config_se &= ~RB_MAP_PKR1_MASK;
3611
3612                                        if (!rb0_mask) {
3613                                                raster_config_se |=
3614                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3615                                        } else {
3616                                                raster_config_se |=
3617                                                        RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618                                        }
3619                                }
3620                        }
3621                }
3622
3623                /* GRBM_GFX_INDEX has a different offset on VI */
3624                gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3625                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3626                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3627        }
3628
3629        /* GRBM_GFX_INDEX has a different offset on VI */
3630        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3631}
3632
3633static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3634{
3635        int i, j;
3636        u32 data;
3637        u32 raster_config = 0, raster_config_1 = 0;
3638        u32 active_rbs = 0;
3639        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3640                                        adev->gfx.config.max_sh_per_se;
3641        unsigned num_rb_pipes;
3642
3643        mutex_lock(&adev->grbm_idx_mutex);
3644        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3645                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3646                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3647                        data = gfx_v8_0_get_rb_active_bitmap(adev);
3648                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3649                                               rb_bitmap_width_per_sh);
3650                }
3651        }
3652        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653
3654        adev->gfx.config.backend_enable_mask = active_rbs;
3655        adev->gfx.config.num_rbs = hweight32(active_rbs);
3656
3657        num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3658                             adev->gfx.config.max_shader_engines, 16);
3659
3660        gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661
3662        if (!adev->gfx.config.backend_enable_mask ||
3663                        adev->gfx.config.num_rbs >= num_rb_pipes) {
3664                WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3665                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666        } else {
3667                gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3668                                                        adev->gfx.config.backend_enable_mask,
3669                                                        num_rb_pipes);
3670        }
3671
3672        /* cache the values for userspace */
3673        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3674                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3675                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3676                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
3677                                RREG32(mmCC_RB_BACKEND_DISABLE);
3678                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3679                                RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3680                        adev->gfx.config.rb_config[i][j].raster_config =
3681                                RREG32(mmPA_SC_RASTER_CONFIG);
3682                        adev->gfx.config.rb_config[i][j].raster_config_1 =
3683                                RREG32(mmPA_SC_RASTER_CONFIG_1);
3684                }
3685        }
3686        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687        mutex_unlock(&adev->grbm_idx_mutex);
3688}
3689
3690#define DEFAULT_SH_MEM_BASES    (0x6000)
3691/**
3692 * gfx_v8_0_init_compute_vmid - gart enable
3693 *
3694 * @adev: amdgpu_device pointer
3695 *
3696 * Initialize compute vmid sh_mem registers
3697 *
3698 */
3699static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3700{
3701        int i;
3702        uint32_t sh_mem_config;
3703        uint32_t sh_mem_bases;
3704
3705        /*
3706         * Configure apertures:
3707         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3708         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3709         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3710         */
3711        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3712
3713        sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3714                        SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3715                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3716                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3717                        MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3718                        SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3719
3720        mutex_lock(&adev->srbm_mutex);
3721        for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3722                vi_srbm_select(adev, 0, 0, 0, i);
3723                /* CP and shaders */
3724                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3725                WREG32(mmSH_MEM_APE1_BASE, 1);
3726                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3727                WREG32(mmSH_MEM_BASES, sh_mem_bases);
3728        }
3729        vi_srbm_select(adev, 0, 0, 0, 0);
3730        mutex_unlock(&adev->srbm_mutex);
3731
3732        /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3733           acccess. These should be enabled by FW for target VMIDs. */
3734        for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3735                WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3736                WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3737                WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3738                WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3739        }
3740}
3741
3742static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3743{
3744        int vmid;
3745
3746        /*
3747         * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3748         * access. Compute VMIDs should be enabled by FW for target VMIDs,
3749         * the driver can enable them for graphics. VMID0 should maintain
3750         * access so that HWS firmware can save/restore entries.
3751         */
3752        for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3753                WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3754                WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3755                WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3756                WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3757        }
3758}
3759
3760static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3761{
3762        switch (adev->asic_type) {
3763        default:
3764                adev->gfx.config.double_offchip_lds_buf = 1;
3765                break;
3766        case CHIP_CARRIZO:
3767        case CHIP_STONEY:
3768                adev->gfx.config.double_offchip_lds_buf = 0;
3769                break;
3770        }
3771}
3772
3773static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3774{
3775        u32 tmp, sh_static_mem_cfg;
3776        int i;
3777
3778        WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3779        WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780        WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3781        WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3782
3783        gfx_v8_0_tiling_mode_table_init(adev);
3784        gfx_v8_0_setup_rb(adev);
3785        gfx_v8_0_get_cu_info(adev);
3786        gfx_v8_0_config_init(adev);
3787
3788        /* XXX SH_MEM regs */
3789        /* where to put LDS, scratch, GPUVM in FSA64 space */
3790        sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3791                                   SWIZZLE_ENABLE, 1);
3792        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3793                                   ELEMENT_SIZE, 1);
3794        sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3795                                   INDEX_STRIDE, 3);
3796        WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3797
3798        mutex_lock(&adev->srbm_mutex);
3799        for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3800                vi_srbm_select(adev, 0, 0, 0, i);
3801                /* CP and shaders */
3802                if (i == 0) {
3803                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3804                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807                        WREG32(mmSH_MEM_CONFIG, tmp);
3808                        WREG32(mmSH_MEM_BASES, 0);
3809                } else {
3810                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3811                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813                                            SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814                        WREG32(mmSH_MEM_CONFIG, tmp);
3815                        tmp = adev->gmc.shared_aperture_start >> 48;
3816                        WREG32(mmSH_MEM_BASES, tmp);
3817                }
3818
3819                WREG32(mmSH_MEM_APE1_BASE, 1);
3820                WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821        }
3822        vi_srbm_select(adev, 0, 0, 0, 0);
3823        mutex_unlock(&adev->srbm_mutex);
3824
3825        gfx_v8_0_init_compute_vmid(adev);
3826        gfx_v8_0_init_gds_vmid(adev);
3827
3828        mutex_lock(&adev->grbm_idx_mutex);
3829        /*
3830         * making sure that the following register writes will be broadcasted
3831         * to all the shaders
3832         */
3833        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834
3835        WREG32(mmPA_SC_FIFO_SIZE,
3836                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3837                        PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3838                   (adev->gfx.config.sc_prim_fifo_size_backend <<
3839                        PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3840                   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3841                        PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3842                   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3843                        PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3844
3845        tmp = RREG32(mmSPI_ARB_PRIORITY);
3846        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3847        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3848        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3849        tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3850        WREG32(mmSPI_ARB_PRIORITY, tmp);
3851
3852        mutex_unlock(&adev->grbm_idx_mutex);
3853
3854}
3855
3856static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3857{
3858        u32 i, j, k;
3859        u32 mask;
3860
3861        mutex_lock(&adev->grbm_idx_mutex);
3862        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3863                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3864                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3865                        for (k = 0; k < adev->usec_timeout; k++) {
3866                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3867                                        break;
3868                                udelay(1);
3869                        }
3870                        if (k == adev->usec_timeout) {
3871                                gfx_v8_0_select_se_sh(adev, 0xffffffff,
3872                                                      0xffffffff, 0xffffffff);
3873                                mutex_unlock(&adev->grbm_idx_mutex);
3874                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3875                                         i, j);
3876                                return;
3877                        }
3878                }
3879        }
3880        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3881        mutex_unlock(&adev->grbm_idx_mutex);
3882
3883        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3884                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3885                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3886                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3887        for (k = 0; k < adev->usec_timeout; k++) {
3888                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3889                        break;
3890                udelay(1);
3891        }
3892}
3893
3894static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3895                                               bool enable)
3896{
3897        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898
3899        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3900        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3901        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3902        tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903
3904        WREG32(mmCP_INT_CNTL_RING0, tmp);
3905}
3906
3907static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908{
3909        adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3910        /* csib */
3911        WREG32(mmRLC_CSIB_ADDR_HI,
3912                        adev->gfx.rlc.clear_state_gpu_addr >> 32);
3913        WREG32(mmRLC_CSIB_ADDR_LO,
3914                        adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3915        WREG32(mmRLC_CSIB_LENGTH,
3916                        adev->gfx.rlc.clear_state_size);
3917}
3918
3919static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3920                                int ind_offset,
3921                                int list_size,
3922                                int *unique_indices,
3923                                int *indices_count,
3924                                int max_indices,
3925                                int *ind_start_offsets,
3926                                int *offset_count,
3927                                int max_offset)
3928{
3929        int indices;
3930        bool new_entry = true;
3931
3932        for (; ind_offset < list_size; ind_offset++) {
3933
3934                if (new_entry) {
3935                        new_entry = false;
3936                        ind_start_offsets[*offset_count] = ind_offset;
3937                        *offset_count = *offset_count + 1;
3938                        BUG_ON(*offset_count >= max_offset);
3939                }
3940
3941                if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3942                        new_entry = true;
3943                        continue;
3944                }
3945
3946                ind_offset += 2;
3947
3948                /* look for the matching indice */
3949                for (indices = 0;
3950                        indices < *indices_count;
3951                        indices++) {
3952                        if (unique_indices[indices] ==
3953                                register_list_format[ind_offset])
3954                                break;
3955                }
3956
3957                if (indices >= *indices_count) {
3958                        unique_indices[*indices_count] =
3959                                register_list_format[ind_offset];
3960                        indices = *indices_count;
3961                        *indices_count = *indices_count + 1;
3962                        BUG_ON(*indices_count >= max_indices);
3963                }
3964
3965                register_list_format[ind_offset] = indices;
3966        }
3967}
3968
3969static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3970{
3971        int i, temp, data;
3972        int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3973        int indices_count = 0;
3974        int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3975        int offset_count = 0;
3976
3977        int list_size;
3978        unsigned int *register_list_format =
3979                kmemdup(adev->gfx.rlc.register_list_format,
3980                        adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3981        if (!register_list_format)
3982                return -ENOMEM;
3983
3984        gfx_v8_0_parse_ind_reg_list(register_list_format,
3985                                RLC_FormatDirectRegListLength,
3986                                adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3987                                unique_indices,
3988                                &indices_count,
3989                                ARRAY_SIZE(unique_indices),
3990                                indirect_start_offsets,
3991                                &offset_count,
3992                                ARRAY_SIZE(indirect_start_offsets));
3993
3994        /* save and restore list */
3995        WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996
3997        WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3998        for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3999                WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4000
4001        /* indirect list */
4002        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4003        for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4004                WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005
4006        list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4007        list_size = list_size >> 1;
4008        WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4009        WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010
4011        /* starting offsets starts */
4012        WREG32(mmRLC_GPM_SCRATCH_ADDR,
4013                adev->gfx.rlc.starting_offsets_start);
4014        for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4015                WREG32(mmRLC_GPM_SCRATCH_DATA,
4016                                indirect_start_offsets[i]);
4017
4018        /* unique indices */
4019        temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4020        data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4021        for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4022                if (unique_indices[i] != 0) {
4023                        WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4024                        WREG32(data + i, unique_indices[i] >> 20);
4025                }
4026        }
4027        kfree(register_list_format);
4028
4029        return 0;
4030}
4031
4032static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033{
4034        WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4035}
4036
4037static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4038{
4039        uint32_t data;
4040
4041        WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042
4043        data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4044        data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4045        data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4046        data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4047        WREG32(mmRLC_PG_DELAY, data);
4048
4049        WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4050        WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4051
4052}
4053
4054static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4055                                                bool enable)
4056{
4057        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4058}
4059
4060static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4061                                                  bool enable)
4062{
4063        WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4064}
4065
4066static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067{
4068        WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4069}
4070
4071static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072{
4073        if ((adev->asic_type == CHIP_CARRIZO) ||
4074            (adev->asic_type == CHIP_STONEY)) {
4075                gfx_v8_0_init_csb(adev);
4076                gfx_v8_0_init_save_restore_list(adev);
4077                gfx_v8_0_enable_save_restore_machine(adev);
4078                WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4079                gfx_v8_0_init_power_gating(adev);
4080                WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4081        } else if ((adev->asic_type == CHIP_POLARIS11) ||
4082                   (adev->asic_type == CHIP_POLARIS12) ||
4083                   (adev->asic_type == CHIP_VEGAM)) {
4084                gfx_v8_0_init_csb(adev);
4085                gfx_v8_0_init_save_restore_list(adev);
4086                gfx_v8_0_enable_save_restore_machine(adev);
4087                gfx_v8_0_init_power_gating(adev);
4088        }
4089
4090}
4091
4092static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4093{
4094        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4095
4096        gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4097        gfx_v8_0_wait_for_rlc_serdes(adev);
4098}
4099
4100static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4101{
4102        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103        udelay(50);
4104
4105        WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4106        udelay(50);
4107}
4108
4109static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4110{
4111        WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4112
4113        /* carrizo do enable cp interrupt after cp inited */
4114        if (!(adev->flags & AMD_IS_APU))
4115                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4116
4117        udelay(50);
4118}
4119
4120static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4121{
4122        if (amdgpu_sriov_vf(adev)) {
4123                gfx_v8_0_init_csb(adev);
4124                return 0;
4125        }
4126
4127        adev->gfx.rlc.funcs->stop(adev);
4128        adev->gfx.rlc.funcs->reset(adev);
4129        gfx_v8_0_init_pg(adev);
4130        adev->gfx.rlc.funcs->start(adev);
4131
4132        return 0;
4133}
4134
4135static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4136{
4137        u32 tmp = RREG32(mmCP_ME_CNTL);
4138
4139        if (enable) {
4140                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4141                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4142                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143        } else {
4144                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4145                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4146                tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4147        }
4148        WREG32(mmCP_ME_CNTL, tmp);
4149        udelay(50);
4150}
4151
4152static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4153{
4154        u32 count = 0;
4155        const struct cs_section_def *sect = NULL;
4156        const struct cs_extent_def *ext = NULL;
4157
4158        /* begin clear state */
4159        count += 2;
4160        /* context control state */
4161        count += 3;
4162
4163        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4164                for (ext = sect->section; ext->extent != NULL; ++ext) {
4165                        if (sect->id == SECT_CONTEXT)
4166                                count += 2 + ext->reg_count;
4167                        else
4168                                return 0;
4169                }
4170        }
4171        /* pa_sc_raster_config/pa_sc_raster_config1 */
4172        count += 4;
4173        /* end clear state */
4174        count += 2;
4175        /* clear state */
4176        count += 2;
4177
4178        return count;
4179}
4180
4181static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4182{
4183        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4184        const struct cs_section_def *sect = NULL;
4185        const struct cs_extent_def *ext = NULL;
4186        int r, i;
4187
4188        /* init the CP */
4189        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4190        WREG32(mmCP_ENDIAN_SWAP, 0);
4191        WREG32(mmCP_DEVICE_ID, 1);
4192
4193        gfx_v8_0_cp_gfx_enable(adev, true);
4194
4195        r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4196        if (r) {
4197                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4198                return r;
4199        }
4200
4201        /* clear state buffer */
4202        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4203        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4204
4205        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4206        amdgpu_ring_write(ring, 0x80000000);
4207        amdgpu_ring_write(ring, 0x80000000);
4208
4209        for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4210                for (ext = sect->section; ext->extent != NULL; ++ext) {
4211                        if (sect->id == SECT_CONTEXT) {
4212                                amdgpu_ring_write(ring,
4213                                       PACKET3(PACKET3_SET_CONTEXT_REG,
4214                                               ext->reg_count));
4215                                amdgpu_ring_write(ring,
4216                                       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4217                                for (i = 0; i < ext->reg_count; i++)
4218                                        amdgpu_ring_write(ring, ext->extent[i]);
4219                        }
4220                }
4221        }
4222
4223        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4224        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4225        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4226        amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4227
4228        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4229        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4230
4231        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4232        amdgpu_ring_write(ring, 0);
4233
4234        /* init the CE partitions */
4235        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4236        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4237        amdgpu_ring_write(ring, 0x8000);
4238        amdgpu_ring_write(ring, 0x8000);
4239
4240        amdgpu_ring_commit(ring);
4241
4242        return 0;
4243}
4244static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4245{
4246        u32 tmp;
4247        /* no gfx doorbells on iceland */
4248        if (adev->asic_type == CHIP_TOPAZ)
4249                return;
4250
4251        tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4252
4253        if (ring->use_doorbell) {
4254                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4255                                DOORBELL_OFFSET, ring->doorbell_index);
4256                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257                                                DOORBELL_HIT, 0);
4258                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4259                                            DOORBELL_EN, 1);
4260        } else {
4261                tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4262        }
4263
4264        WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4265
4266        if (adev->flags & AMD_IS_APU)
4267                return;
4268
4269        tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4270                                        DOORBELL_RANGE_LOWER,
4271                                        adev->doorbell_index.gfx_ring0);
4272        WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4273
4274        WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4275                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4276}
4277
4278static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4279{
4280        struct amdgpu_ring *ring;
4281        u32 tmp;
4282        u32 rb_bufsz;
4283        u64 rb_addr, rptr_addr, wptr_gpu_addr;
4284
4285        /* Set the write pointer delay */
4286        WREG32(mmCP_RB_WPTR_DELAY, 0);
4287
4288        /* set the RB to use vmid 0 */
4289        WREG32(mmCP_RB_VMID, 0);
4290
4291        /* Set ring buffer size */
4292        ring = &adev->gfx.gfx_ring[0];
4293        rb_bufsz = order_base_2(ring->ring_size / 8);
4294        tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4295        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4296        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4297        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298#ifdef __BIG_ENDIAN
4299        tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300#endif
4301        WREG32(mmCP_RB0_CNTL, tmp);
4302
4303        /* Initialize the ring buffer's read and write pointers */
4304        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305        ring->wptr = 0;
4306        WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307
4308        /* set the wb address wether it's enabled or not */
4309        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4310        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4311        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312
4313        wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4314        WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4315        WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316        mdelay(1);
4317        WREG32(mmCP_RB0_CNTL, tmp);
4318
4319        rb_addr = ring->gpu_addr >> 8;
4320        WREG32(mmCP_RB0_BASE, rb_addr);
4321        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322
4323        gfx_v8_0_set_cpg_door_bell(adev, ring);
4324        /* start the ring */
4325        amdgpu_ring_clear_ring(ring);
4326        gfx_v8_0_cp_gfx_start(adev);
4327        ring->sched.ready = true;
4328
4329        return 0;
4330}
4331
4332static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4333{
4334        if (enable) {
4335                WREG32(mmCP_MEC_CNTL, 0);
4336        } else {
4337                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4338                adev->gfx.kiq.ring.sched.ready = false;
4339        }
4340        udelay(50);
4341}
4342
4343/* KIQ functions */
4344static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4345{
4346        uint32_t tmp;
4347        struct amdgpu_device *adev = ring->adev;
4348
4349        /* tell RLC which is KIQ queue */
4350        tmp = RREG32(mmRLC_CP_SCHEDULERS);
4351        tmp &= 0xffffff00;
4352        tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4353        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354        tmp |= 0x80;
4355        WREG32(mmRLC_CP_SCHEDULERS, tmp);
4356}
4357
4358static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4359{
4360        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4361        uint64_t queue_mask = 0;
4362        int r, i;
4363
4364        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4365                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4366                        continue;
4367
4368                /* This situation may be hit in the future if a new HW
4369                 * generation exposes more than 64 queues. If so, the
4370                 * definition of queue_mask needs updating */
4371                if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4372                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4373                        break;
4374                }
4375
4376                queue_mask |= (1ull << i);
4377        }
4378
4379        r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4380        if (r) {
4381                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4382                return r;
4383        }
4384        /* set resources */
4385        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4386        amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4387        amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4388        amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4389        amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4390        amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4391        amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4392        amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4393        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4394                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4395                uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4396                uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4397
4398                /* map queues */
4399                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4400                /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4401                amdgpu_ring_write(kiq_ring,
4402                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4403                amdgpu_ring_write(kiq_ring,
4404                                  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4405                                  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4406                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4407                                  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4408                amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4409                amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4410                amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4411                amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4412        }
4413
4414        amdgpu_ring_commit(kiq_ring);
4415
4416        return 0;
4417}
4418
4419static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4420{
4421        int i, r = 0;
4422
4423        if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4424                WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4425                for (i = 0; i < adev->usec_timeout; i++) {
4426                        if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4427                                break;
4428                        udelay(1);
4429                }
4430                if (i == adev->usec_timeout)
4431                        r = -ETIMEDOUT;
4432        }
4433        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4434        WREG32(mmCP_HQD_PQ_RPTR, 0);
4435        WREG32(mmCP_HQD_PQ_WPTR, 0);
4436
4437        return r;
4438}
4439
4440static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4441{
4442        struct amdgpu_device *adev = ring->adev;
4443
4444        if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4445                if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4446                        mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4447                        mqd->cp_hqd_queue_priority =
4448                                AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4449                }
4450        }
4451}
4452
4453static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4454{
4455        struct amdgpu_device *adev = ring->adev;
4456        struct vi_mqd *mqd = ring->mqd_ptr;
4457        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4458        uint32_t tmp;
4459
4460        mqd->header = 0xC0310800;
4461        mqd->compute_pipelinestat_enable = 0x00000001;
4462        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4463        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4464        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4465        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4466        mqd->compute_misc_reserved = 0x00000003;
4467        mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4468                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4469        mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4470                                                     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4471        eop_base_addr = ring->eop_gpu_addr >> 8;
4472        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4473        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4474
4475        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4476        tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4477        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4478                        (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4479
4480        mqd->cp_hqd_eop_control = tmp;
4481
4482        /* enable doorbell? */
4483        tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4484                            CP_HQD_PQ_DOORBELL_CONTROL,
4485                            DOORBELL_EN,
4486                            ring->use_doorbell ? 1 : 0);
4487
4488        mqd->cp_hqd_pq_doorbell_control = tmp;
4489
4490        /* set the pointer to the MQD */
4491        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4492        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4493
4494        /* set MQD vmid to 0 */
4495        tmp = RREG32(mmCP_MQD_CONTROL);
4496        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4497        mqd->cp_mqd_control = tmp;
4498
4499        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4500        hqd_gpu_addr = ring->gpu_addr >> 8;
4501        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4502        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4503
4504        /* set up the HQD, this is similar to CP_RB0_CNTL */
4505        tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4506        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4507                            (order_base_2(ring->ring_size / 4) - 1));
4508        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4509                        ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4510#ifdef __BIG_ENDIAN
4511        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4512#endif
4513        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4514        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4515        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4516        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4517        mqd->cp_hqd_pq_control = tmp;
4518
4519        /* set the wb address whether it's enabled or not */
4520        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4521        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4522        mqd->cp_hqd_pq_rptr_report_addr_hi =
4523                upper_32_bits(wb_gpu_addr) & 0xffff;
4524
4525        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4526        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4527        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4528        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4529
4530        tmp = 0;
4531        /* enable the doorbell if requested */
4532        if (ring->use_doorbell) {
4533                tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4534                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4535                                DOORBELL_OFFSET, ring->doorbell_index);
4536
4537                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538                                         DOORBELL_EN, 1);
4539                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4540                                         DOORBELL_SOURCE, 0);
4541                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4542                                         DOORBELL_HIT, 0);
4543        }
4544
4545        mqd->cp_hqd_pq_doorbell_control = tmp;
4546
4547        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4548        ring->wptr = 0;
4549        mqd->cp_hqd_pq_wptr = ring->wptr;
4550        mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4551
4552        /* set the vmid for the queue */
4553        mqd->cp_hqd_vmid = 0;
4554
4555        tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4556        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4557        mqd->cp_hqd_persistent_state = tmp;
4558
4559        /* set MTYPE */
4560        tmp = RREG32(mmCP_HQD_IB_CONTROL);
4561        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4562        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4563        mqd->cp_hqd_ib_control = tmp;
4564
4565        tmp = RREG32(mmCP_HQD_IQ_TIMER);
4566        tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4567        mqd->cp_hqd_iq_timer = tmp;
4568
4569        tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4570        tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4571        mqd->cp_hqd_ctx_save_control = tmp;
4572
4573        /* defaults */
4574        mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4575        mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4576        mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4577        mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4578        mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4579        mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4580        mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4581        mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4582        mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4583        mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4584        mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4585        mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4586
4587        /* set static priority for a queue/ring */
4588        gfx_v8_0_mqd_set_priority(ring, mqd);
4589        mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4590
4591        /* map_queues packet doesn't need activate the queue,
4592         * so only kiq need set this field.
4593         */
4594        if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4595                mqd->cp_hqd_active = 1;
4596
4597        return 0;
4598}
4599
4600static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4601                        struct vi_mqd *mqd)
4602{
4603        uint32_t mqd_reg;
4604        uint32_t *mqd_data;
4605
4606        /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4607        mqd_data = &mqd->cp_mqd_base_addr_lo;
4608
4609        /* disable wptr polling */
4610        WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611
4612        /* program all HQD registers */
4613        for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4614                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615
4616        /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4617         * This is safe since EOP RPTR==WPTR for any inactive HQD
4618         * on ASICs that do not support context-save.
4619         * EOP writes/reads can start anywhere in the ring.
4620         */
4621        if (adev->asic_type != CHIP_TONGA) {
4622                WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4623                WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4624                WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4625        }
4626
4627        for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4628                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629
4630        /* activate the HQD */
4631        for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4632                WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4633
4634        return 0;
4635}
4636
4637static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638{
4639        struct amdgpu_device *adev = ring->adev;
4640        struct vi_mqd *mqd = ring->mqd_ptr;
4641        int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642
4643        gfx_v8_0_kiq_setting(ring);
4644
4645        if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4646                /* reset MQD to a clean status */
4647                if (adev->gfx.mec.mqd_backup[mqd_idx])
4648                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649
4650                /* reset ring buffer */
4651                ring->wptr = 0;
4652                amdgpu_ring_clear_ring(ring);
4653                mutex_lock(&adev->srbm_mutex);
4654                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655                gfx_v8_0_mqd_commit(adev, mqd);
4656                vi_srbm_select(adev, 0, 0, 0, 0);
4657                mutex_unlock(&adev->srbm_mutex);
4658        } else {
4659                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4660                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4661                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4662                mutex_lock(&adev->srbm_mutex);
4663                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4664                gfx_v8_0_mqd_init(ring);
4665                gfx_v8_0_mqd_commit(adev, mqd);
4666                vi_srbm_select(adev, 0, 0, 0, 0);
4667                mutex_unlock(&adev->srbm_mutex);
4668
4669                if (adev->gfx.mec.mqd_backup[mqd_idx])
4670                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4671        }
4672
4673        return 0;
4674}
4675
4676static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677{
4678        struct amdgpu_device *adev = ring->adev;
4679        struct vi_mqd *mqd = ring->mqd_ptr;
4680        int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681
4682        if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4683                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4684                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4685                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4686                mutex_lock(&adev->srbm_mutex);
4687                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4688                gfx_v8_0_mqd_init(ring);
4689                vi_srbm_select(adev, 0, 0, 0, 0);
4690                mutex_unlock(&adev->srbm_mutex);
4691
4692                if (adev->gfx.mec.mqd_backup[mqd_idx])
4693                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4694        } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4695                /* reset MQD to a clean status */
4696                if (adev->gfx.mec.mqd_backup[mqd_idx])
4697                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4698                /* reset ring buffer */
4699                ring->wptr = 0;
4700                amdgpu_ring_clear_ring(ring);
4701        } else {
4702                amdgpu_ring_clear_ring(ring);
4703        }
4704        return 0;
4705}
4706
4707static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708{
4709        if (adev->asic_type > CHIP_TONGA) {
4710                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4711                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712        }
4713        /* enable doorbells */
4714        WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4715}
4716
4717static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718{
4719        struct amdgpu_ring *ring;
4720        int r;
4721
4722        ring = &adev->gfx.kiq.ring;
4723
4724        r = amdgpu_bo_reserve(ring->mqd_obj, false);
4725        if (unlikely(r != 0))
4726                return r;
4727
4728        r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4729        if (unlikely(r != 0))
4730                return r;
4731
4732        gfx_v8_0_kiq_init_queue(ring);
4733        amdgpu_bo_kunmap(ring->mqd_obj);
4734        ring->mqd_ptr = NULL;
4735        amdgpu_bo_unreserve(ring->mqd_obj);
4736        ring->sched.ready = true;
4737        return 0;
4738}
4739
4740static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741{
4742        struct amdgpu_ring *ring = NULL;
4743        int r = 0, i;
4744
4745        gfx_v8_0_cp_compute_enable(adev, true);
4746
4747        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748                ring = &adev->gfx.compute_ring[i];
4749
4750                r = amdgpu_bo_reserve(ring->mqd_obj, false);
4751                if (unlikely(r != 0))
4752                        goto done;
4753                r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754                if (!r) {
4755                        r = gfx_v8_0_kcq_init_queue(ring);
4756                        amdgpu_bo_kunmap(ring->mqd_obj);
4757                        ring->mqd_ptr = NULL;
4758                }
4759                amdgpu_bo_unreserve(ring->mqd_obj);
4760                if (r)
4761                        goto done;
4762        }
4763
4764        gfx_v8_0_set_mec_doorbell_range(adev);
4765
4766        r = gfx_v8_0_kiq_kcq_enable(adev);
4767        if (r)
4768                goto done;
4769
4770done:
4771        return r;
4772}
4773
4774static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4775{
4776        int r, i;
4777        struct amdgpu_ring *ring;
4778
4779        /* collect all the ring_tests here, gfx, kiq, compute */
4780        ring = &adev->gfx.gfx_ring[0];
4781        r = amdgpu_ring_test_helper(ring);
4782        if (r)
4783                return r;
4784
4785        ring = &adev->gfx.kiq.ring;
4786        r = amdgpu_ring_test_helper(ring);
4787        if (r)
4788                return r;
4789
4790        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4791                ring = &adev->gfx.compute_ring[i];
4792                amdgpu_ring_test_helper(ring);
4793        }
4794
4795        return 0;
4796}
4797
4798static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4799{
4800        int r;
4801
4802        if (!(adev->flags & AMD_IS_APU))
4803                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804
4805        r = gfx_v8_0_kiq_resume(adev);
4806        if (r)
4807                return r;
4808
4809        r = gfx_v8_0_cp_gfx_resume(adev);
4810        if (r)
4811                return r;
4812
4813        r = gfx_v8_0_kcq_resume(adev);
4814        if (r)
4815                return r;
4816
4817        r = gfx_v8_0_cp_test_all_rings(adev);
4818        if (r)
4819                return r;
4820
4821        gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4822
4823        return 0;
4824}
4825
4826static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827{
4828        gfx_v8_0_cp_gfx_enable(adev, enable);
4829        gfx_v8_0_cp_compute_enable(adev, enable);
4830}
4831
4832static int gfx_v8_0_hw_init(void *handle)
4833{
4834        int r;
4835        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836
4837        gfx_v8_0_init_golden_registers(adev);
4838        gfx_v8_0_constants_init(adev);
4839
4840        r = adev->gfx.rlc.funcs->resume(adev);
4841        if (r)
4842                return r;
4843
4844        r = gfx_v8_0_cp_resume(adev);
4845
4846        return r;
4847}
4848
4849static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4850{
4851        int r, i;
4852        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4853
4854        r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4855        if (r)
4856                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4857
4858        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4859                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4860
4861                amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4862                amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4863                                                PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4864                                                PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4865                                                PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4866                                                PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4867                amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4868                amdgpu_ring_write(kiq_ring, 0);
4869                amdgpu_ring_write(kiq_ring, 0);
4870                amdgpu_ring_write(kiq_ring, 0);
4871        }
4872        r = amdgpu_ring_test_helper(kiq_ring);
4873        if (r)
4874                DRM_ERROR("KCQ disable failed\n");
4875
4876        return r;
4877}
4878
4879static bool gfx_v8_0_is_idle(void *handle)
4880{
4881        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4882
4883        if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4884                || RREG32(mmGRBM_STATUS2) != 0x8)
4885                return false;
4886        else
4887                return true;
4888}
4889
4890static bool gfx_v8_0_rlc_is_idle(void *handle)
4891{
4892        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893
4894        if (RREG32(mmGRBM_STATUS2) != 0x8)
4895                return false;
4896        else
4897                return true;
4898}
4899
4900static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4901{
4902        unsigned int i;
4903        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904
4905        for (i = 0; i < adev->usec_timeout; i++) {
4906                if (gfx_v8_0_rlc_is_idle(handle))
4907                        return 0;
4908
4909                udelay(1);
4910        }
4911        return -ETIMEDOUT;
4912}
4913
4914static int gfx_v8_0_wait_for_idle(void *handle)
4915{
4916        unsigned int i;
4917        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918
4919        for (i = 0; i < adev->usec_timeout; i++) {
4920                if (gfx_v8_0_is_idle(handle))
4921                        return 0;
4922
4923                udelay(1);
4924        }
4925        return -ETIMEDOUT;
4926}
4927
4928static int gfx_v8_0_hw_fini(void *handle)
4929{
4930        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4931
4932        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4933        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4934
4935        amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4936
4937        amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4938
4939        /* disable KCQ to avoid CPC touch memory not valid anymore */
4940        gfx_v8_0_kcq_disable(adev);
4941
4942        if (amdgpu_sriov_vf(adev)) {
4943                pr_debug("For SRIOV client, shouldn't do anything.\n");
4944                return 0;
4945        }
4946        amdgpu_gfx_rlc_enter_safe_mode(adev);
4947        if (!gfx_v8_0_wait_for_idle(adev))
4948                gfx_v8_0_cp_enable(adev, false);
4949        else
4950                pr_err("cp is busy, skip halt cp\n");
4951        if (!gfx_v8_0_wait_for_rlc_idle(adev))
4952                adev->gfx.rlc.funcs->stop(adev);
4953        else
4954                pr_err("rlc is busy, skip halt rlc\n");
4955        amdgpu_gfx_rlc_exit_safe_mode(adev);
4956
4957        return 0;
4958}
4959
4960static int gfx_v8_0_suspend(void *handle)
4961{
4962        return gfx_v8_0_hw_fini(handle);
4963}
4964
4965static int gfx_v8_0_resume(void *handle)
4966{
4967        return gfx_v8_0_hw_init(handle);
4968}
4969
4970static bool gfx_v8_0_check_soft_reset(void *handle)
4971{
4972        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4973        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4974        u32 tmp;
4975
4976        /* GRBM_STATUS */
4977        tmp = RREG32(mmGRBM_STATUS);
4978        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4979                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4980                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4981                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4982                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4983                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4984                   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4985                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4986                                                GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4987                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4988                                                GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4989                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4990                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4991        }
4992
4993        /* GRBM_STATUS2 */
4994        tmp = RREG32(mmGRBM_STATUS2);
4995        if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4996                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4997                                                GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4998
4999        if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5000            REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5001            REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5002                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003                                                SOFT_RESET_CPF, 1);
5004                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5005                                                SOFT_RESET_CPC, 1);
5006                grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5007                                                SOFT_RESET_CPG, 1);
5008                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5009                                                SOFT_RESET_GRBM, 1);
5010        }
5011
5012        /* SRBM_STATUS */
5013        tmp = RREG32(mmSRBM_STATUS);
5014        if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5015                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5016                                                SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5017        if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5018                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5019                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5020
5021        if (grbm_soft_reset || srbm_soft_reset) {
5022                adev->gfx.grbm_soft_reset = grbm_soft_reset;
5023                adev->gfx.srbm_soft_reset = srbm_soft_reset;
5024                return true;
5025        } else {
5026                adev->gfx.grbm_soft_reset = 0;
5027                adev->gfx.srbm_soft_reset = 0;
5028                return false;
5029        }
5030}
5031
5032static int gfx_v8_0_pre_soft_reset(void *handle)
5033{
5034        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035        u32 grbm_soft_reset = 0;
5036
5037        if ((!adev->gfx.grbm_soft_reset) &&
5038            (!adev->gfx.srbm_soft_reset))
5039                return 0;
5040
5041        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5042
5043        /* stop the rlc */
5044        adev->gfx.rlc.funcs->stop(adev);
5045
5046        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5047            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5048                /* Disable GFX parsing/prefetching */
5049                gfx_v8_0_cp_gfx_enable(adev, false);
5050
5051        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5052            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5053            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5054            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5055                int i;
5056
5057                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5058                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5059
5060                        mutex_lock(&adev->srbm_mutex);
5061                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5062                        gfx_v8_0_deactivate_hqd(adev, 2);
5063                        vi_srbm_select(adev, 0, 0, 0, 0);
5064                        mutex_unlock(&adev->srbm_mutex);
5065                }
5066                /* Disable MEC parsing/prefetching */
5067                gfx_v8_0_cp_compute_enable(adev, false);
5068        }
5069
5070        return 0;
5071}
5072
5073static int gfx_v8_0_soft_reset(void *handle)
5074{
5075        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5076        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5077        u32 tmp;
5078
5079        if ((!adev->gfx.grbm_soft_reset) &&
5080            (!adev->gfx.srbm_soft_reset))
5081                return 0;
5082
5083        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5084        srbm_soft_reset = adev->gfx.srbm_soft_reset;
5085
5086        if (grbm_soft_reset || srbm_soft_reset) {
5087                tmp = RREG32(mmGMCON_DEBUG);
5088                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5089                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5090                WREG32(mmGMCON_DEBUG, tmp);
5091                udelay(50);
5092        }
5093
5094        if (grbm_soft_reset) {
5095                tmp = RREG32(mmGRBM_SOFT_RESET);
5096                tmp |= grbm_soft_reset;
5097                dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5098                WREG32(mmGRBM_SOFT_RESET, tmp);
5099                tmp = RREG32(mmGRBM_SOFT_RESET);
5100
5101                udelay(50);
5102
5103                tmp &= ~grbm_soft_reset;
5104                WREG32(mmGRBM_SOFT_RESET, tmp);
5105                tmp = RREG32(mmGRBM_SOFT_RESET);
5106        }
5107
5108        if (srbm_soft_reset) {
5109                tmp = RREG32(mmSRBM_SOFT_RESET);
5110                tmp |= srbm_soft_reset;
5111                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5112                WREG32(mmSRBM_SOFT_RESET, tmp);
5113                tmp = RREG32(mmSRBM_SOFT_RESET);
5114
5115                udelay(50);
5116
5117                tmp &= ~srbm_soft_reset;
5118                WREG32(mmSRBM_SOFT_RESET, tmp);
5119                tmp = RREG32(mmSRBM_SOFT_RESET);
5120        }
5121
5122        if (grbm_soft_reset || srbm_soft_reset) {
5123                tmp = RREG32(mmGMCON_DEBUG);
5124                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5125                tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5126                WREG32(mmGMCON_DEBUG, tmp);
5127        }
5128
5129        /* Wait a little for things to settle down */
5130        udelay(50);
5131
5132        return 0;
5133}
5134
5135static int gfx_v8_0_post_soft_reset(void *handle)
5136{
5137        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5138        u32 grbm_soft_reset = 0;
5139
5140        if ((!adev->gfx.grbm_soft_reset) &&
5141            (!adev->gfx.srbm_soft_reset))
5142                return 0;
5143
5144        grbm_soft_reset = adev->gfx.grbm_soft_reset;
5145
5146        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5147            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5148            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5149            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5150                int i;
5151
5152                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5153                        struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5154
5155                        mutex_lock(&adev->srbm_mutex);
5156                        vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5157                        gfx_v8_0_deactivate_hqd(adev, 2);
5158                        vi_srbm_select(adev, 0, 0, 0, 0);
5159                        mutex_unlock(&adev->srbm_mutex);
5160                }
5161                gfx_v8_0_kiq_resume(adev);
5162                gfx_v8_0_kcq_resume(adev);
5163        }
5164
5165        if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5166            REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5167                gfx_v8_0_cp_gfx_resume(adev);
5168
5169        gfx_v8_0_cp_test_all_rings(adev);
5170
5171        adev->gfx.rlc.funcs->start(adev);
5172
5173        return 0;
5174}
5175
5176/**
5177 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5178 *
5179 * @adev: amdgpu_device pointer
5180 *
5181 * Fetches a GPU clock counter snapshot.
5182 * Returns the 64 bit clock counter snapshot.
5183 */
5184static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5185{
5186        uint64_t clock;
5187
5188        mutex_lock(&adev->gfx.gpu_clock_mutex);
5189        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5190        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5191                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5192        mutex_unlock(&adev->gfx.gpu_clock_mutex);
5193        return clock;
5194}
5195
5196static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5197                                          uint32_t vmid,
5198                                          uint32_t gds_base, uint32_t gds_size,
5199                                          uint32_t gws_base, uint32_t gws_size,
5200                                          uint32_t oa_base, uint32_t oa_size)
5201{
5202        /* GDS Base */
5203        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205                                WRITE_DATA_DST_SEL(0)));
5206        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5207        amdgpu_ring_write(ring, 0);
5208        amdgpu_ring_write(ring, gds_base);
5209
5210        /* GDS Size */
5211        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213                                WRITE_DATA_DST_SEL(0)));
5214        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5215        amdgpu_ring_write(ring, 0);
5216        amdgpu_ring_write(ring, gds_size);
5217
5218        /* GWS */
5219        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5220        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5221                                WRITE_DATA_DST_SEL(0)));
5222        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5223        amdgpu_ring_write(ring, 0);
5224        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5225
5226        /* OA */
5227        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5228        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5229                                WRITE_DATA_DST_SEL(0)));
5230        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5231        amdgpu_ring_write(ring, 0);
5232        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5233}
5234
5235static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5236{
5237        WREG32(mmSQ_IND_INDEX,
5238                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5239                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5240                (address << SQ_IND_INDEX__INDEX__SHIFT) |
5241                (SQ_IND_INDEX__FORCE_READ_MASK));
5242        return RREG32(mmSQ_IND_DATA);
5243}
5244
5245static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5246                           uint32_t wave, uint32_t thread,
5247                           uint32_t regno, uint32_t num, uint32_t *out)
5248{
5249        WREG32(mmSQ_IND_INDEX,
5250                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5251                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5252                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5253                (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5254                (SQ_IND_INDEX__FORCE_READ_MASK) |
5255                (SQ_IND_INDEX__AUTO_INCR_MASK));
5256        while (num--)
5257                *(out++) = RREG32(mmSQ_IND_DATA);
5258}
5259
5260static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5261{
5262        /* type 0 wave data */
5263        dst[(*no_fields)++] = 0;
5264        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5265        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5266        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5267        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5268        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5269        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5270        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5271        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5272        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5273        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5274        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5275        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5276        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5277        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5278        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5279        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5280        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5281        dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5282}
5283
5284static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5285                                     uint32_t wave, uint32_t start,
5286                                     uint32_t size, uint32_t *dst)
5287{
5288        wave_read_regs(
5289                adev, simd, wave, 0,
5290                start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5291}
5292
5293
5294static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5295        .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5296        .select_se_sh = &gfx_v8_0_select_se_sh,
5297        .read_wave_data = &gfx_v8_0_read_wave_data,
5298        .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5299        .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5300};
5301
5302static int gfx_v8_0_early_init(void *handle)
5303{
5304        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5305
5306        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5307        adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5308                                          AMDGPU_MAX_COMPUTE_RINGS);
5309        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5310        gfx_v8_0_set_ring_funcs(adev);
5311        gfx_v8_0_set_irq_funcs(adev);
5312        gfx_v8_0_set_gds_init(adev);
5313        gfx_v8_0_set_rlc_funcs(adev);
5314
5315        return 0;
5316}
5317
5318static int gfx_v8_0_late_init(void *handle)
5319{
5320        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5321        int r;
5322
5323        r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5324        if (r)
5325                return r;
5326
5327        r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5328        if (r)
5329                return r;
5330
5331        /* requires IBs so do in late init after IB pool is initialized */
5332        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5333        if (r)
5334                return r;
5335
5336        r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5337        if (r) {
5338                DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5339                return r;
5340        }
5341
5342        r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5343        if (r) {
5344                DRM_ERROR(
5345                        "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5346                        r);
5347                return r;
5348        }
5349
5350        return 0;
5351}
5352
5353static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5354                                                       bool enable)
5355{
5356        if ((adev->asic_type == CHIP_POLARIS11) ||
5357            (adev->asic_type == CHIP_POLARIS12) ||
5358            (adev->asic_type == CHIP_VEGAM))
5359                /* Send msg to SMU via Powerplay */
5360                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5361
5362        WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5363}
5364
5365static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5366                                                        bool enable)
5367{
5368        WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5369}
5370
5371static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5372                bool enable)
5373{
5374        WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5375}
5376
5377static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5378                                          bool enable)
5379{
5380        WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5381}
5382
5383static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5384                                                bool enable)
5385{
5386        WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5387
5388        /* Read any GFX register to wake up GFX. */
5389        if (!enable)
5390                RREG32(mmDB_RENDER_CONTROL);
5391}
5392
5393static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5394                                          bool enable)
5395{
5396        if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5397                cz_enable_gfx_cg_power_gating(adev, true);
5398                if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5399                        cz_enable_gfx_pipeline_power_gating(adev, true);
5400        } else {
5401                cz_enable_gfx_cg_power_gating(adev, false);
5402                cz_enable_gfx_pipeline_power_gating(adev, false);
5403        }
5404}
5405
5406static int gfx_v8_0_set_powergating_state(void *handle,
5407                                          enum amd_powergating_state state)
5408{
5409        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5410        bool enable = (state == AMD_PG_STATE_GATE);
5411
5412        if (amdgpu_sriov_vf(adev))
5413                return 0;
5414
5415        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5416                                AMD_PG_SUPPORT_RLC_SMU_HS |
5417                                AMD_PG_SUPPORT_CP |
5418                                AMD_PG_SUPPORT_GFX_DMG))
5419                amdgpu_gfx_rlc_enter_safe_mode(adev);
5420        switch (adev->asic_type) {
5421        case CHIP_CARRIZO:
5422        case CHIP_STONEY:
5423
5424                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5425                        cz_enable_sck_slow_down_on_power_up(adev, true);
5426                        cz_enable_sck_slow_down_on_power_down(adev, true);
5427                } else {
5428                        cz_enable_sck_slow_down_on_power_up(adev, false);
5429                        cz_enable_sck_slow_down_on_power_down(adev, false);
5430                }
5431                if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5432                        cz_enable_cp_power_gating(adev, true);
5433                else
5434                        cz_enable_cp_power_gating(adev, false);
5435
5436                cz_update_gfx_cg_power_gating(adev, enable);
5437
5438                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5439                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440                else
5441                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442
5443                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5444                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445                else
5446                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447                break;
5448        case CHIP_POLARIS11:
5449        case CHIP_POLARIS12:
5450        case CHIP_VEGAM:
5451                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5452                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5453                else
5454                        gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5455
5456                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5457                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5458                else
5459                        gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5460
5461                if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5462                        polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5463                else
5464                        polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5465                break;
5466        default:
5467                break;
5468        }
5469        if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5470                                AMD_PG_SUPPORT_RLC_SMU_HS |
5471                                AMD_PG_SUPPORT_CP |
5472                                AMD_PG_SUPPORT_GFX_DMG))
5473                amdgpu_gfx_rlc_exit_safe_mode(adev);
5474        return 0;
5475}
5476
5477static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5478{
5479        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5480        int data;
5481
5482        if (amdgpu_sriov_vf(adev))
5483                *flags = 0;
5484
5485        /* AMD_CG_SUPPORT_GFX_MGCG */
5486        data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5487        if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5488                *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5489
5490        /* AMD_CG_SUPPORT_GFX_CGLG */
5491        data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5492        if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5493                *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5494
5495        /* AMD_CG_SUPPORT_GFX_CGLS */
5496        if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5497                *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5498
5499        /* AMD_CG_SUPPORT_GFX_CGTS */
5500        data = RREG32(mmCGTS_SM_CTRL_REG);
5501        if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5502                *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5503
5504        /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5505        if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5506                *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5507
5508        /* AMD_CG_SUPPORT_GFX_RLC_LS */
5509        data = RREG32(mmRLC_MEM_SLP_CNTL);
5510        if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5511                *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5512
5513        /* AMD_CG_SUPPORT_GFX_CP_LS */
5514        data = RREG32(mmCP_MEM_SLP_CNTL);
5515        if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5516                *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5517}
5518
5519static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5520                                     uint32_t reg_addr, uint32_t cmd)
5521{
5522        uint32_t data;
5523
5524        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5525
5526        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5527        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5528
5529        data = RREG32(mmRLC_SERDES_WR_CTRL);
5530        if (adev->asic_type == CHIP_STONEY)
5531                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5532                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5533                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5534                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5535                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5536                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5537                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5538                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5539                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5540        else
5541                data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5542                          RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5543                          RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5544                          RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5545                          RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5546                          RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5547                          RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5548                          RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5549                          RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5550                          RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5551                          RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5552        data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5553                 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5554                 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5555                 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5556
5557        WREG32(mmRLC_SERDES_WR_CTRL, data);
5558}
5559
5560#define MSG_ENTER_RLC_SAFE_MODE     1
5561#define MSG_EXIT_RLC_SAFE_MODE      0
5562#define RLC_GPR_REG2__REQ_MASK 0x00000001
5563#define RLC_GPR_REG2__REQ__SHIFT 0
5564#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5565#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5566
5567static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5568{
5569        uint32_t rlc_setting;
5570
5571        rlc_setting = RREG32(mmRLC_CNTL);
5572        if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5573                return false;
5574
5575        return true;
5576}
5577
5578static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5579{
5580        uint32_t data;
5581        unsigned i;
5582        data = RREG32(mmRLC_CNTL);
5583        data |= RLC_SAFE_MODE__CMD_MASK;
5584        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5585        data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5586        WREG32(mmRLC_SAFE_MODE, data);
5587
5588        /* wait for RLC_SAFE_MODE */
5589        for (i = 0; i < adev->usec_timeout; i++) {
5590                if ((RREG32(mmRLC_GPM_STAT) &
5591                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5592                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5593                    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5594                     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5595                        break;
5596                udelay(1);
5597        }
5598        for (i = 0; i < adev->usec_timeout; i++) {
5599                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5600                        break;
5601                udelay(1);
5602        }
5603}
5604
5605static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5606{
5607        uint32_t data;
5608        unsigned i;
5609
5610        data = RREG32(mmRLC_CNTL);
5611        data |= RLC_SAFE_MODE__CMD_MASK;
5612        data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5613        WREG32(mmRLC_SAFE_MODE, data);
5614
5615        for (i = 0; i < adev->usec_timeout; i++) {
5616                if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5617                        break;
5618                udelay(1);
5619        }
5620}
5621
5622static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5623{
5624        u32 data;
5625
5626        if (amdgpu_sriov_is_pp_one_vf(adev))
5627                data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5628        else
5629                data = RREG32(mmRLC_SPM_VMID);
5630
5631        data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5632        data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5633
5634        if (amdgpu_sriov_is_pp_one_vf(adev))
5635                WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5636        else
5637                WREG32(mmRLC_SPM_VMID, data);
5638}
5639
5640static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5641        .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5642        .set_safe_mode = gfx_v8_0_set_safe_mode,
5643        .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5644        .init = gfx_v8_0_rlc_init,
5645        .get_csb_size = gfx_v8_0_get_csb_size,
5646        .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5647        .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5648        .resume = gfx_v8_0_rlc_resume,
5649        .stop = gfx_v8_0_rlc_stop,
5650        .reset = gfx_v8_0_rlc_reset,
5651        .start = gfx_v8_0_rlc_start,
5652        .update_spm_vmid = gfx_v8_0_update_spm_vmid
5653};
5654
5655static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5656                                                      bool enable)
5657{
5658        uint32_t temp, data;
5659
5660        amdgpu_gfx_rlc_enter_safe_mode(adev);
5661
5662        /* It is disabled by HW by default */
5663        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5664                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5665                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5666                                /* 1 - RLC memory Light sleep */
5667                                WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5668
5669                        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5670                                WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5671                }
5672
5673                /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5674                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5675                if (adev->flags & AMD_IS_APU)
5676                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5677                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5678                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5679                else
5680                        data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5681                                  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5682                                  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5683                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5684
5685                if (temp != data)
5686                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687
5688                /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5689                gfx_v8_0_wait_for_rlc_serdes(adev);
5690
5691                /* 5 - clear mgcg override */
5692                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5693
5694                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5695                        /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5696                        temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5697                        data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5698                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5699                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5700                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5701                        if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5702                            (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5703                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5704                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5705                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5706                        if (temp != data)
5707                                WREG32(mmCGTS_SM_CTRL_REG, data);
5708                }
5709                udelay(50);
5710
5711                /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5712                gfx_v8_0_wait_for_rlc_serdes(adev);
5713        } else {
5714                /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5715                temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5716                data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5717                                RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5718                                RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5719                                RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5720                if (temp != data)
5721                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5722
5723                /* 2 - disable MGLS in RLC */
5724                data = RREG32(mmRLC_MEM_SLP_CNTL);
5725                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5726                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5727                        WREG32(mmRLC_MEM_SLP_CNTL, data);
5728                }
5729
5730                /* 3 - disable MGLS in CP */
5731                data = RREG32(mmCP_MEM_SLP_CNTL);
5732                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5733                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5734                        WREG32(mmCP_MEM_SLP_CNTL, data);
5735                }
5736
5737                /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5738                temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5739                data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5740                                CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5741                if (temp != data)
5742                        WREG32(mmCGTS_SM_CTRL_REG, data);
5743
5744                /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745                gfx_v8_0_wait_for_rlc_serdes(adev);
5746
5747                /* 6 - set mgcg override */
5748                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5749
5750                udelay(50);
5751
5752                /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5753                gfx_v8_0_wait_for_rlc_serdes(adev);
5754        }
5755
5756        amdgpu_gfx_rlc_exit_safe_mode(adev);
5757}
5758
5759static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5760                                                      bool enable)
5761{
5762        uint32_t temp, temp1, data, data1;
5763
5764        temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5765
5766        amdgpu_gfx_rlc_enter_safe_mode(adev);
5767
5768        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5769                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5770                data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5771                if (temp1 != data1)
5772                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5773
5774                /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775                gfx_v8_0_wait_for_rlc_serdes(adev);
5776
5777                /* 2 - clear cgcg override */
5778                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5779
5780                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5781                gfx_v8_0_wait_for_rlc_serdes(adev);
5782
5783                /* 3 - write cmd to set CGLS */
5784                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5785
5786                /* 4 - enable cgcg */
5787                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5788
5789                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5790                        /* enable cgls*/
5791                        data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792
5793                        temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5794                        data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5795
5796                        if (temp1 != data1)
5797                                WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5798                } else {
5799                        data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5800                }
5801
5802                if (temp != data)
5803                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5804
5805                /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5806                 * Cmp_busy/GFX_Idle interrupts
5807                 */
5808                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5809        } else {
5810                /* disable cntx_empty_int_enable & GFX Idle interrupt */
5811                gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5812
5813                /* TEST CGCG */
5814                temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5815                data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5816                                RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5817                if (temp1 != data1)
5818                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5819
5820                /* read gfx register to wake up cgcg */
5821                RREG32(mmCB_CGTT_SCLK_CTRL);
5822                RREG32(mmCB_CGTT_SCLK_CTRL);
5823                RREG32(mmCB_CGTT_SCLK_CTRL);
5824                RREG32(mmCB_CGTT_SCLK_CTRL);
5825
5826                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5827                gfx_v8_0_wait_for_rlc_serdes(adev);
5828
5829                /* write cmd to Set CGCG Overrride */
5830                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5831
5832                /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5833                gfx_v8_0_wait_for_rlc_serdes(adev);
5834
5835                /* write cmd to Clear CGLS */
5836                gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5837
5838                /* disable cgcg, cgls should be disabled too. */
5839                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5840                          RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5841                if (temp != data)
5842                        WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5843                /* enable interrupts again for PG */
5844                gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5845        }
5846
5847        gfx_v8_0_wait_for_rlc_serdes(adev);
5848
5849        amdgpu_gfx_rlc_exit_safe_mode(adev);
5850}
5851static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5852                                            bool enable)
5853{
5854        if (enable) {
5855                /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5856                 * ===  MGCG + MGLS + TS(CG/LS) ===
5857                 */
5858                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5859                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5860        } else {
5861                /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5862                 * ===  CGCG + CGLS ===
5863                 */
5864                gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5865                gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5866        }
5867        return 0;
5868}
5869
5870static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5871                                          enum amd_clockgating_state state)
5872{
5873        uint32_t msg_id, pp_state = 0;
5874        uint32_t pp_support_state = 0;
5875
5876        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5877                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5878                        pp_support_state = PP_STATE_SUPPORT_LS;
5879                        pp_state = PP_STATE_LS;
5880                }
5881                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5882                        pp_support_state |= PP_STATE_SUPPORT_CG;
5883                        pp_state |= PP_STATE_CG;
5884                }
5885                if (state == AMD_CG_STATE_UNGATE)
5886                        pp_state = 0;
5887
5888                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5889                                PP_BLOCK_GFX_CG,
5890                                pp_support_state,
5891                                pp_state);
5892                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5893        }
5894
5895        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5896                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5897                        pp_support_state = PP_STATE_SUPPORT_LS;
5898                        pp_state = PP_STATE_LS;
5899                }
5900
5901                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5902                        pp_support_state |= PP_STATE_SUPPORT_CG;
5903                        pp_state |= PP_STATE_CG;
5904                }
5905
5906                if (state == AMD_CG_STATE_UNGATE)
5907                        pp_state = 0;
5908
5909                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5910                                PP_BLOCK_GFX_MG,
5911                                pp_support_state,
5912                                pp_state);
5913                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5914        }
5915
5916        return 0;
5917}
5918
5919static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5920                                          enum amd_clockgating_state state)
5921{
5922
5923        uint32_t msg_id, pp_state = 0;
5924        uint32_t pp_support_state = 0;
5925
5926        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5927                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5928                        pp_support_state = PP_STATE_SUPPORT_LS;
5929                        pp_state = PP_STATE_LS;
5930                }
5931                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5932                        pp_support_state |= PP_STATE_SUPPORT_CG;
5933                        pp_state |= PP_STATE_CG;
5934                }
5935                if (state == AMD_CG_STATE_UNGATE)
5936                        pp_state = 0;
5937
5938                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5939                                PP_BLOCK_GFX_CG,
5940                                pp_support_state,
5941                                pp_state);
5942                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5943        }
5944
5945        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5946                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5947                        pp_support_state = PP_STATE_SUPPORT_LS;
5948                        pp_state = PP_STATE_LS;
5949                }
5950                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5951                        pp_support_state |= PP_STATE_SUPPORT_CG;
5952                        pp_state |= PP_STATE_CG;
5953                }
5954                if (state == AMD_CG_STATE_UNGATE)
5955                        pp_state = 0;
5956
5957                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5958                                PP_BLOCK_GFX_3D,
5959                                pp_support_state,
5960                                pp_state);
5961                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5962        }
5963
5964        if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5965                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5966                        pp_support_state = PP_STATE_SUPPORT_LS;
5967                        pp_state = PP_STATE_LS;
5968                }
5969
5970                if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5971                        pp_support_state |= PP_STATE_SUPPORT_CG;
5972                        pp_state |= PP_STATE_CG;
5973                }
5974
5975                if (state == AMD_CG_STATE_UNGATE)
5976                        pp_state = 0;
5977
5978                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5979                                PP_BLOCK_GFX_MG,
5980                                pp_support_state,
5981                                pp_state);
5982                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5983        }
5984
5985        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5986                pp_support_state = PP_STATE_SUPPORT_LS;
5987
5988                if (state == AMD_CG_STATE_UNGATE)
5989                        pp_state = 0;
5990                else
5991                        pp_state = PP_STATE_LS;
5992
5993                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5994                                PP_BLOCK_GFX_RLC,
5995                                pp_support_state,
5996                                pp_state);
5997                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5998        }
5999
6000        if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6001                pp_support_state = PP_STATE_SUPPORT_LS;
6002
6003                if (state == AMD_CG_STATE_UNGATE)
6004                        pp_state = 0;
6005                else
6006                        pp_state = PP_STATE_LS;
6007                msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6008                        PP_BLOCK_GFX_CP,
6009                        pp_support_state,
6010                        pp_state);
6011                amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6012        }
6013
6014        return 0;
6015}
6016
6017static int gfx_v8_0_set_clockgating_state(void *handle,
6018                                          enum amd_clockgating_state state)
6019{
6020        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6021
6022        if (amdgpu_sriov_vf(adev))
6023                return 0;
6024
6025        switch (adev->asic_type) {
6026        case CHIP_FIJI:
6027        case CHIP_CARRIZO:
6028        case CHIP_STONEY:
6029                gfx_v8_0_update_gfx_clock_gating(adev,
6030                                                 state == AMD_CG_STATE_GATE);
6031                break;
6032        case CHIP_TONGA:
6033                gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6034                break;
6035        case CHIP_POLARIS10:
6036        case CHIP_POLARIS11:
6037        case CHIP_POLARIS12:
6038        case CHIP_VEGAM:
6039                gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6040                break;
6041        default:
6042                break;
6043        }
6044        return 0;
6045}
6046
6047static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6048{
6049        return ring->adev->wb.wb[ring->rptr_offs];
6050}
6051
6052static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6053{
6054        struct amdgpu_device *adev = ring->adev;
6055
6056        if (ring->use_doorbell)
6057                /* XXX check if swapping is necessary on BE */
6058                return ring->adev->wb.wb[ring->wptr_offs];
6059        else
6060                return RREG32(mmCP_RB0_WPTR);
6061}
6062
6063static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6064{
6065        struct amdgpu_device *adev = ring->adev;
6066
6067        if (ring->use_doorbell) {
6068                /* XXX check if swapping is necessary on BE */
6069                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6070                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6071        } else {
6072                WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6073                (void)RREG32(mmCP_RB0_WPTR);
6074        }
6075}
6076
6077static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6078{
6079        u32 ref_and_mask, reg_mem_engine;
6080
6081        if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6082            (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6083                switch (ring->me) {
6084                case 1:
6085                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6086                        break;
6087                case 2:
6088                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6089                        break;
6090                default:
6091                        return;
6092                }
6093                reg_mem_engine = 0;
6094        } else {
6095                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6096                reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6097        }
6098
6099        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6100        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6101                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6102                                 reg_mem_engine));
6103        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6104        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6105        amdgpu_ring_write(ring, ref_and_mask);
6106        amdgpu_ring_write(ring, ref_and_mask);
6107        amdgpu_ring_write(ring, 0x20); /* poll interval */
6108}
6109
6110static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6111{
6112        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6113        amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6114                EVENT_INDEX(4));
6115
6116        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6117        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6118                EVENT_INDEX(0));
6119}
6120
6121static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6122                                        struct amdgpu_job *job,
6123                                        struct amdgpu_ib *ib,
6124                                        uint32_t flags)
6125{
6126        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6127        u32 header, control = 0;
6128
6129        if (ib->flags & AMDGPU_IB_FLAG_CE)
6130                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6131        else
6132                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6133
6134        control |= ib->length_dw | (vmid << 24);
6135
6136        if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6137                control |= INDIRECT_BUFFER_PRE_ENB(1);
6138
6139                if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6140                        gfx_v8_0_ring_emit_de_meta(ring);
6141        }
6142
6143        amdgpu_ring_write(ring, header);
6144        amdgpu_ring_write(ring,
6145#ifdef __BIG_ENDIAN
6146                          (2 << 0) |
6147#endif
6148                          (ib->gpu_addr & 0xFFFFFFFC));
6149        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6150        amdgpu_ring_write(ring, control);
6151}
6152
6153static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6154                                          struct amdgpu_job *job,
6155                                          struct amdgpu_ib *ib,
6156                                          uint32_t flags)
6157{
6158        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6159        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6160
6161        /* Currently, there is a high possibility to get wave ID mismatch
6162         * between ME and GDS, leading to a hw deadlock, because ME generates
6163         * different wave IDs than the GDS expects. This situation happens
6164         * randomly when at least 5 compute pipes use GDS ordered append.
6165         * The wave IDs generated by ME are also wrong after suspend/resume.
6166         * Those are probably bugs somewhere else in the kernel driver.
6167         *
6168         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6169         * GDS to 0 for this ring (me/pipe).
6170         */
6171        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6172                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6173                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6174                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6175        }
6176
6177        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6178        amdgpu_ring_write(ring,
6179#ifdef __BIG_ENDIAN
6180                                (2 << 0) |
6181#endif
6182                                (ib->gpu_addr & 0xFFFFFFFC));
6183        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6184        amdgpu_ring_write(ring, control);
6185}
6186
6187static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6188                                         u64 seq, unsigned flags)
6189{
6190        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6191        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6192
6193        /* Workaround for cache flush problems. First send a dummy EOP
6194         * event down the pipe with seq one below.
6195         */
6196        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6197        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6198                                 EOP_TC_ACTION_EN |
6199                                 EOP_TC_WB_ACTION_EN |
6200                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6201                                 EVENT_INDEX(5)));
6202        amdgpu_ring_write(ring, addr & 0xfffffffc);
6203        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6204                                DATA_SEL(1) | INT_SEL(0));
6205        amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6206        amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6207
6208        /* Then send the real EOP event down the pipe:
6209         * EVENT_WRITE_EOP - flush caches, send int */
6210        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6211        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6212                                 EOP_TC_ACTION_EN |
6213                                 EOP_TC_WB_ACTION_EN |
6214                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6215                                 EVENT_INDEX(5)));
6216        amdgpu_ring_write(ring, addr & 0xfffffffc);
6217        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6218                          DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6219        amdgpu_ring_write(ring, lower_32_bits(seq));
6220        amdgpu_ring_write(ring, upper_32_bits(seq));
6221
6222}
6223
6224static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6225{
6226        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6227        uint32_t seq = ring->fence_drv.sync_seq;
6228        uint64_t addr = ring->fence_drv.gpu_addr;
6229
6230        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6231        amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6232                                 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6233                                 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6234        amdgpu_ring_write(ring, addr & 0xfffffffc);
6235        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6236        amdgpu_ring_write(ring, seq);
6237        amdgpu_ring_write(ring, 0xffffffff);
6238        amdgpu_ring_write(ring, 4); /* poll interval */
6239}
6240
6241static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6242                                        unsigned vmid, uint64_t pd_addr)
6243{
6244        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6245
6246        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6247
6248        /* wait for the invalidate to complete */
6249        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6250        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6251                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6252                                 WAIT_REG_MEM_ENGINE(0))); /* me */
6253        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6254        amdgpu_ring_write(ring, 0);
6255        amdgpu_ring_write(ring, 0); /* ref */
6256        amdgpu_ring_write(ring, 0); /* mask */
6257        amdgpu_ring_write(ring, 0x20); /* poll interval */
6258
6259        /* compute doesn't have PFP */
6260        if (usepfp) {
6261                /* sync PFP to ME, otherwise we might get invalid PFP reads */
6262                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6263                amdgpu_ring_write(ring, 0x0);
6264        }
6265}
6266
6267static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6268{
6269        return ring->adev->wb.wb[ring->wptr_offs];
6270}
6271
6272static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6273{
6274        struct amdgpu_device *adev = ring->adev;
6275
6276        /* XXX check if swapping is necessary on BE */
6277        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6278        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6279}
6280
6281static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6282                                             u64 addr, u64 seq,
6283                                             unsigned flags)
6284{
6285        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6286        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6287
6288        /* RELEASE_MEM - flush caches, send int */
6289        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6290        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6291                                 EOP_TC_ACTION_EN |
6292                                 EOP_TC_WB_ACTION_EN |
6293                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6294                                 EVENT_INDEX(5)));
6295        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6296        amdgpu_ring_write(ring, addr & 0xfffffffc);
6297        amdgpu_ring_write(ring, upper_32_bits(addr));
6298        amdgpu_ring_write(ring, lower_32_bits(seq));
6299        amdgpu_ring_write(ring, upper_32_bits(seq));
6300}
6301
6302static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6303                                         u64 seq, unsigned int flags)
6304{
6305        /* we only allocate 32bit for each seq wb address */
6306        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6307
6308        /* write fence seq to the "addr" */
6309        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6310        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6311                                 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6312        amdgpu_ring_write(ring, lower_32_bits(addr));
6313        amdgpu_ring_write(ring, upper_32_bits(addr));
6314        amdgpu_ring_write(ring, lower_32_bits(seq));
6315
6316        if (flags & AMDGPU_FENCE_FLAG_INT) {
6317                /* set register to trigger INT */
6318                amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6319                amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6320                                         WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6321                amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6322                amdgpu_ring_write(ring, 0);
6323                amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6324        }
6325}
6326
6327static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328{
6329        amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330        amdgpu_ring_write(ring, 0);
6331}
6332
6333static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6334{
6335        uint32_t dw2 = 0;
6336
6337        if (amdgpu_sriov_vf(ring->adev))
6338                gfx_v8_0_ring_emit_ce_meta(ring);
6339
6340        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6341        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6342                gfx_v8_0_ring_emit_vgt_flush(ring);
6343                /* set load_global_config & load_global_uconfig */
6344                dw2 |= 0x8001;
6345                /* set load_cs_sh_regs */
6346                dw2 |= 0x01000000;
6347                /* set load_per_context_state & load_gfx_sh_regs for GFX */
6348                dw2 |= 0x10002;
6349
6350                /* set load_ce_ram if preamble presented */
6351                if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6352                        dw2 |= 0x10000000;
6353        } else {
6354                /* still load_ce_ram if this is the first time preamble presented
6355                 * although there is no context switch happens.
6356                 */
6357                if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6358                        dw2 |= 0x10000000;
6359        }
6360
6361        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6362        amdgpu_ring_write(ring, dw2);
6363        amdgpu_ring_write(ring, 0);
6364}
6365
6366static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6367{
6368        unsigned ret;
6369
6370        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6371        amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6372        amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6373        amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6374        ret = ring->wptr & ring->buf_mask;
6375        amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6376        return ret;
6377}
6378
6379static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6380{
6381        unsigned cur;
6382
6383        BUG_ON(offset > ring->buf_mask);
6384        BUG_ON(ring->ring[offset] != 0x55aa55aa);
6385
6386        cur = (ring->wptr & ring->buf_mask) - 1;
6387        if (likely(cur > offset))
6388                ring->ring[offset] = cur - offset;
6389        else
6390                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6391}
6392
6393static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6394                                    uint32_t reg_val_offs)
6395{
6396        struct amdgpu_device *adev = ring->adev;
6397
6398        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6399        amdgpu_ring_write(ring, 0 |     /* src: register*/
6400                                (5 << 8) |      /* dst: memory */
6401                                (1 << 20));     /* write confirm */
6402        amdgpu_ring_write(ring, reg);
6403        amdgpu_ring_write(ring, 0);
6404        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6405                                reg_val_offs * 4));
6406        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6407                                reg_val_offs * 4));
6408}
6409
6410static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6411                                  uint32_t val)
6412{
6413        uint32_t cmd;
6414
6415        switch (ring->funcs->type) {
6416        case AMDGPU_RING_TYPE_GFX:
6417                cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6418                break;
6419        case AMDGPU_RING_TYPE_KIQ:
6420                cmd = 1 << 16; /* no inc addr */
6421                break;
6422        default:
6423                cmd = WR_CONFIRM;
6424                break;
6425        }
6426
6427        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6428        amdgpu_ring_write(ring, cmd);
6429        amdgpu_ring_write(ring, reg);
6430        amdgpu_ring_write(ring, 0);
6431        amdgpu_ring_write(ring, val);
6432}
6433
6434static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6435{
6436        struct amdgpu_device *adev = ring->adev;
6437        uint32_t value = 0;
6438
6439        value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6440        value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6441        value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6442        value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6443        WREG32(mmSQ_CMD, value);
6444}
6445
6446static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6447                                                 enum amdgpu_interrupt_state state)
6448{
6449        WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6450                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6451}
6452
6453static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6454                                                     int me, int pipe,
6455                                                     enum amdgpu_interrupt_state state)
6456{
6457        u32 mec_int_cntl, mec_int_cntl_reg;
6458
6459        /*
6460         * amdgpu controls only the first MEC. That's why this function only
6461         * handles the setting of interrupts for this specific MEC. All other
6462         * pipes' interrupts are set by amdkfd.
6463         */
6464
6465        if (me == 1) {
6466                switch (pipe) {
6467                case 0:
6468                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6469                        break;
6470                case 1:
6471                        mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6472                        break;
6473                case 2:
6474                        mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6475                        break;
6476                case 3:
6477                        mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6478                        break;
6479                default:
6480                        DRM_DEBUG("invalid pipe %d\n", pipe);
6481                        return;
6482                }
6483        } else {
6484                DRM_DEBUG("invalid me %d\n", me);
6485                return;
6486        }
6487
6488        switch (state) {
6489        case AMDGPU_IRQ_STATE_DISABLE:
6490                mec_int_cntl = RREG32(mec_int_cntl_reg);
6491                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6492                WREG32(mec_int_cntl_reg, mec_int_cntl);
6493                break;
6494        case AMDGPU_IRQ_STATE_ENABLE:
6495                mec_int_cntl = RREG32(mec_int_cntl_reg);
6496                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6497                WREG32(mec_int_cntl_reg, mec_int_cntl);
6498                break;
6499        default:
6500                break;
6501        }
6502}
6503
6504static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6505                                             struct amdgpu_irq_src *source,
6506                                             unsigned type,
6507                                             enum amdgpu_interrupt_state state)
6508{
6509        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6510                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6511
6512        return 0;
6513}
6514
6515static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6516                                              struct amdgpu_irq_src *source,
6517                                              unsigned type,
6518                                              enum amdgpu_interrupt_state state)
6519{
6520        WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6521                     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6522
6523        return 0;
6524}
6525
6526static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6527                                            struct amdgpu_irq_src *src,
6528                                            unsigned type,
6529                                            enum amdgpu_interrupt_state state)
6530{
6531        switch (type) {
6532        case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6533                gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6534                break;
6535        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6536                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6537                break;
6538        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6539                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6540                break;
6541        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6542                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6543                break;
6544        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6545                gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6546                break;
6547        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6548                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6549                break;
6550        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6551                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6552                break;
6553        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6554                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6555                break;
6556        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6557                gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6558                break;
6559        default:
6560                break;
6561        }
6562        return 0;
6563}
6564
6565static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6566                                         struct amdgpu_irq_src *source,
6567                                         unsigned int type,
6568                                         enum amdgpu_interrupt_state state)
6569{
6570        int enable_flag;
6571
6572        switch (state) {
6573        case AMDGPU_IRQ_STATE_DISABLE:
6574                enable_flag = 0;
6575                break;
6576
6577        case AMDGPU_IRQ_STATE_ENABLE:
6578                enable_flag = 1;
6579                break;
6580
6581        default:
6582                return -EINVAL;
6583        }
6584
6585        WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586        WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587        WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588        WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589        WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6590        WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591                     enable_flag);
6592        WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593                     enable_flag);
6594        WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595                     enable_flag);
6596        WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6597                     enable_flag);
6598        WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6599                     enable_flag);
6600        WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6601                     enable_flag);
6602        WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6603                     enable_flag);
6604        WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6605                     enable_flag);
6606
6607        return 0;
6608}
6609
6610static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6611                                     struct amdgpu_irq_src *source,
6612                                     unsigned int type,
6613                                     enum amdgpu_interrupt_state state)
6614{
6615        int enable_flag;
6616
6617        switch (state) {
6618        case AMDGPU_IRQ_STATE_DISABLE:
6619                enable_flag = 1;
6620                break;
6621
6622        case AMDGPU_IRQ_STATE_ENABLE:
6623                enable_flag = 0;
6624                break;
6625
6626        default:
6627                return -EINVAL;
6628        }
6629
6630        WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6631                     enable_flag);
6632
6633        return 0;
6634}
6635
6636static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6637                            struct amdgpu_irq_src *source,
6638                            struct amdgpu_iv_entry *entry)
6639{
6640        int i;
6641        u8 me_id, pipe_id, queue_id;
6642        struct amdgpu_ring *ring;
6643
6644        DRM_DEBUG("IH: CP EOP\n");
6645        me_id = (entry->ring_id & 0x0c) >> 2;
6646        pipe_id = (entry->ring_id & 0x03) >> 0;
6647        queue_id = (entry->ring_id & 0x70) >> 4;
6648
6649        switch (me_id) {
6650        case 0:
6651                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6652                break;
6653        case 1:
6654        case 2:
6655                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6656                        ring = &adev->gfx.compute_ring[i];
6657                        /* Per-queue interrupt is supported for MEC starting from VI.
6658                          * The interrupt can only be enabled/disabled per pipe instead of per queue.
6659                          */
6660                        if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6661                                amdgpu_fence_process(ring);
6662                }
6663                break;
6664        }
6665        return 0;
6666}
6667
6668static void gfx_v8_0_fault(struct amdgpu_device *adev,
6669                           struct amdgpu_iv_entry *entry)
6670{
6671        u8 me_id, pipe_id, queue_id;
6672        struct amdgpu_ring *ring;
6673        int i;
6674
6675        me_id = (entry->ring_id & 0x0c) >> 2;
6676        pipe_id = (entry->ring_id & 0x03) >> 0;
6677        queue_id = (entry->ring_id & 0x70) >> 4;
6678
6679        switch (me_id) {
6680        case 0:
6681                drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6682                break;
6683        case 1:
6684        case 2:
6685                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6686                        ring = &adev->gfx.compute_ring[i];
6687                        if (ring->me == me_id && ring->pipe == pipe_id &&
6688                            ring->queue == queue_id)
6689                                drm_sched_fault(&ring->sched);
6690                }
6691                break;
6692        }
6693}
6694
6695static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6696                                 struct amdgpu_irq_src *source,
6697                                 struct amdgpu_iv_entry *entry)
6698{
6699        DRM_ERROR("Illegal register access in command stream\n");
6700        gfx_v8_0_fault(adev, entry);
6701        return 0;
6702}
6703
6704static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6705                                  struct amdgpu_irq_src *source,
6706                                  struct amdgpu_iv_entry *entry)
6707{
6708        DRM_ERROR("Illegal instruction in command stream\n");
6709        gfx_v8_0_fault(adev, entry);
6710        return 0;
6711}
6712
6713static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6714                                     struct amdgpu_irq_src *source,
6715                                     struct amdgpu_iv_entry *entry)
6716{
6717        DRM_ERROR("CP EDC/ECC error detected.");
6718        return 0;
6719}
6720
6721static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6722                                  bool from_wq)
6723{
6724        u32 enc, se_id, sh_id, cu_id;
6725        char type[20];
6726        int sq_edc_source = -1;
6727
6728        enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6729        se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6730
6731        switch (enc) {
6732                case 0:
6733                        DRM_INFO("SQ general purpose intr detected:"
6734                                        "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6735                                        "host_cmd_overflow %d, cmd_timestamp %d,"
6736                                        "reg_timestamp %d, thread_trace_buff_full %d,"
6737                                        "wlt %d, thread_trace %d.\n",
6738                                        se_id,
6739                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6740                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6741                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6742                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6743                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6744                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6745                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6746                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6747                                        );
6748                        break;
6749                case 1:
6750                case 2:
6751
6752                        cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6753                        sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6754
6755                        /*
6756                         * This function can be called either directly from ISR
6757                         * or from BH in which case we can access SQ_EDC_INFO
6758                         * instance
6759                         */
6760                        if (from_wq) {
6761                                mutex_lock(&adev->grbm_idx_mutex);
6762                                gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6763
6764                                sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6765
6766                                gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6767                                mutex_unlock(&adev->grbm_idx_mutex);
6768                        }
6769
6770                        if (enc == 1)
6771                                sprintf(type, "instruction intr");
6772                        else
6773                                sprintf(type, "EDC/ECC error");
6774
6775                        DRM_INFO(
6776                                "SQ %s detected: "
6777                                        "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6778                                        "trap %s, sq_ed_info.source %s.\n",
6779                                        type, se_id, sh_id, cu_id,
6780                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6781                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6782                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6783                                        REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6784                                        (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6785                                );
6786                        break;
6787                default:
6788                        DRM_ERROR("SQ invalid encoding type\n.");
6789        }
6790}
6791
6792static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6793{
6794
6795        struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6796        struct sq_work *sq_work = container_of(work, struct sq_work, work);
6797
6798        gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6799}
6800
6801static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6802                           struct amdgpu_irq_src *source,
6803                           struct amdgpu_iv_entry *entry)
6804{
6805        unsigned ih_data = entry->src_data[0];
6806
6807        /*
6808         * Try to submit work so SQ_EDC_INFO can be accessed from
6809         * BH. If previous work submission hasn't finished yet
6810         * just print whatever info is possible directly from the ISR.
6811         */
6812        if (work_pending(&adev->gfx.sq_work.work)) {
6813                gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6814        } else {
6815                adev->gfx.sq_work.ih_data = ih_data;
6816                schedule_work(&adev->gfx.sq_work.work);
6817        }
6818
6819        return 0;
6820}
6821
6822static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6823{
6824        amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6825        amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6826                          PACKET3_TC_ACTION_ENA |
6827                          PACKET3_SH_KCACHE_ACTION_ENA |
6828                          PACKET3_SH_ICACHE_ACTION_ENA |
6829                          PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6830        amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6831        amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6832        amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6833}
6834
6835static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6836{
6837        amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6838        amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6839                          PACKET3_TC_ACTION_ENA |
6840                          PACKET3_SH_KCACHE_ACTION_ENA |
6841                          PACKET3_SH_ICACHE_ACTION_ENA |
6842                          PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6843        amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6844        amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6845        amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6846        amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6847        amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6848}
6849
6850
6851/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6852#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6853static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6854                                        uint32_t pipe, bool enable)
6855{
6856        uint32_t val;
6857        uint32_t wcl_cs_reg;
6858
6859        val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6860
6861        switch (pipe) {
6862        case 0:
6863                wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6864                break;
6865        case 1:
6866                wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6867                break;
6868        case 2:
6869                wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6870                break;
6871        case 3:
6872                wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6873                break;
6874        default:
6875                DRM_DEBUG("invalid pipe %d\n", pipe);
6876                return;
6877        }
6878
6879        amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6880
6881}
6882
6883#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6884static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6885{
6886        struct amdgpu_device *adev = ring->adev;
6887        uint32_t val;
6888        int i;
6889
6890        /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6891         * number of gfx waves. Setting 5 bit will make sure gfx only gets
6892         * around 25% of gpu resources.
6893         */
6894        val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6895        amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6896
6897        /* Restrict waves for normal/low priority compute queues as well
6898         * to get best QoS for high priority compute jobs.
6899         *
6900         * amdgpu controls only 1st ME(0-3 CS pipes).
6901         */
6902        for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6903                if (i != ring->pipe)
6904                        gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6905
6906        }
6907
6908}
6909
6910static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6911        .name = "gfx_v8_0",
6912        .early_init = gfx_v8_0_early_init,
6913        .late_init = gfx_v8_0_late_init,
6914        .sw_init = gfx_v8_0_sw_init,
6915        .sw_fini = gfx_v8_0_sw_fini,
6916        .hw_init = gfx_v8_0_hw_init,
6917        .hw_fini = gfx_v8_0_hw_fini,
6918        .suspend = gfx_v8_0_suspend,
6919        .resume = gfx_v8_0_resume,
6920        .is_idle = gfx_v8_0_is_idle,
6921        .wait_for_idle = gfx_v8_0_wait_for_idle,
6922        .check_soft_reset = gfx_v8_0_check_soft_reset,
6923        .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6924        .soft_reset = gfx_v8_0_soft_reset,
6925        .post_soft_reset = gfx_v8_0_post_soft_reset,
6926        .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6927        .set_powergating_state = gfx_v8_0_set_powergating_state,
6928        .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6929};
6930
6931static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6932        .type = AMDGPU_RING_TYPE_GFX,
6933        .align_mask = 0xff,
6934        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6935        .support_64bit_ptrs = false,
6936        .get_rptr = gfx_v8_0_ring_get_rptr,
6937        .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6938        .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6939        .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6940                5 +  /* COND_EXEC */
6941                7 +  /* PIPELINE_SYNC */
6942                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6943                12 +  /* FENCE for VM_FLUSH */
6944                20 + /* GDS switch */
6945                4 + /* double SWITCH_BUFFER,
6946                       the first COND_EXEC jump to the place just
6947                           prior to this double SWITCH_BUFFER  */
6948                5 + /* COND_EXEC */
6949                7 +      /*     HDP_flush */
6950                4 +      /*     VGT_flush */
6951                14 + /* CE_META */
6952                31 + /* DE_META */
6953                3 + /* CNTX_CTRL */
6954                5 + /* HDP_INVL */
6955                12 + 12 + /* FENCE x2 */
6956                2 + /* SWITCH_BUFFER */
6957                5, /* SURFACE_SYNC */
6958        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6959        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6960        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6961        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6962        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6963        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6964        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6965        .test_ring = gfx_v8_0_ring_test_ring,
6966        .test_ib = gfx_v8_0_ring_test_ib,
6967        .insert_nop = amdgpu_ring_insert_nop,
6968        .pad_ib = amdgpu_ring_generic_pad_ib,
6969        .emit_switch_buffer = gfx_v8_ring_emit_sb,
6970        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6971        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6972        .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6973        .emit_wreg = gfx_v8_0_ring_emit_wreg,
6974        .soft_recovery = gfx_v8_0_ring_soft_recovery,
6975        .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6976};
6977
6978static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6979        .type = AMDGPU_RING_TYPE_COMPUTE,
6980        .align_mask = 0xff,
6981        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6982        .support_64bit_ptrs = false,
6983        .get_rptr = gfx_v8_0_ring_get_rptr,
6984        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6985        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6986        .emit_frame_size =
6987                20 + /* gfx_v8_0_ring_emit_gds_switch */
6988                7 + /* gfx_v8_0_ring_emit_hdp_flush */
6989                5 + /* hdp_invalidate */
6990                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6991                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6992                7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6993                7 + /* gfx_v8_0_emit_mem_sync_compute */
6994                5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6995                15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6996        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6997        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6999        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7000        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7001        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7002        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7003        .test_ring = gfx_v8_0_ring_test_ring,
7004        .test_ib = gfx_v8_0_ring_test_ib,
7005        .insert_nop = amdgpu_ring_insert_nop,
7006        .pad_ib = amdgpu_ring_generic_pad_ib,
7007        .emit_wreg = gfx_v8_0_ring_emit_wreg,
7008        .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7009        .emit_wave_limit = gfx_v8_0_emit_wave_limit,
7010};
7011
7012static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7013        .type = AMDGPU_RING_TYPE_KIQ,
7014        .align_mask = 0xff,
7015        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7016        .support_64bit_ptrs = false,
7017        .get_rptr = gfx_v8_0_ring_get_rptr,
7018        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7019        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7020        .emit_frame_size =
7021                20 + /* gfx_v8_0_ring_emit_gds_switch */
7022                7 + /* gfx_v8_0_ring_emit_hdp_flush */
7023                5 + /* hdp_invalidate */
7024                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7025                17 + /* gfx_v8_0_ring_emit_vm_flush */
7026                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7027        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7028        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7029        .test_ring = gfx_v8_0_ring_test_ring,
7030        .insert_nop = amdgpu_ring_insert_nop,
7031        .pad_ib = amdgpu_ring_generic_pad_ib,
7032        .emit_rreg = gfx_v8_0_ring_emit_rreg,
7033        .emit_wreg = gfx_v8_0_ring_emit_wreg,
7034};
7035
7036static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7037{
7038        int i;
7039
7040        adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7041
7042        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7043                adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7044
7045        for (i = 0; i < adev->gfx.num_compute_rings; i++)
7046                adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7047}
7048
7049static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7050        .set = gfx_v8_0_set_eop_interrupt_state,
7051        .process = gfx_v8_0_eop_irq,
7052};
7053
7054static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7055        .set = gfx_v8_0_set_priv_reg_fault_state,
7056        .process = gfx_v8_0_priv_reg_irq,
7057};
7058
7059static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7060        .set = gfx_v8_0_set_priv_inst_fault_state,
7061        .process = gfx_v8_0_priv_inst_irq,
7062};
7063
7064static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7065        .set = gfx_v8_0_set_cp_ecc_int_state,
7066        .process = gfx_v8_0_cp_ecc_error_irq,
7067};
7068
7069static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7070        .set = gfx_v8_0_set_sq_int_state,
7071        .process = gfx_v8_0_sq_irq,
7072};
7073
7074static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7075{
7076        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7077        adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7078
7079        adev->gfx.priv_reg_irq.num_types = 1;
7080        adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7081
7082        adev->gfx.priv_inst_irq.num_types = 1;
7083        adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7084
7085        adev->gfx.cp_ecc_error_irq.num_types = 1;
7086        adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7087
7088        adev->gfx.sq_irq.num_types = 1;
7089        adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7090}
7091
7092static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7093{
7094        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7095}
7096
7097static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7098{
7099        /* init asci gds info */
7100        adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7101        adev->gds.gws_size = 64;
7102        adev->gds.oa_size = 16;
7103        adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7104}
7105
7106static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7107                                                 u32 bitmap)
7108{
7109        u32 data;
7110
7111        if (!bitmap)
7112                return;
7113
7114        data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7115        data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7116
7117        WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7118}
7119
7120static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7121{
7122        u32 data, mask;
7123
7124        data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7125                RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7126
7127        mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7128
7129        return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7130}
7131
7132static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7133{
7134        int i, j, k, counter, active_cu_number = 0;
7135        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7136        struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7137        unsigned disable_masks[4 * 2];
7138        u32 ao_cu_num;
7139
7140        memset(cu_info, 0, sizeof(*cu_info));
7141
7142        if (adev->flags & AMD_IS_APU)
7143                ao_cu_num = 2;
7144        else
7145                ao_cu_num = adev->gfx.config.max_cu_per_sh;
7146
7147        amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7148
7149        mutex_lock(&adev->grbm_idx_mutex);
7150        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7151                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7152                        mask = 1;
7153                        ao_bitmap = 0;
7154                        counter = 0;
7155                        gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7156                        if (i < 4 && j < 2)
7157                                gfx_v8_0_set_user_cu_inactive_bitmap(
7158                                        adev, disable_masks[i * 2 + j]);
7159                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7160                        cu_info->bitmap[i][j] = bitmap;
7161
7162                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7163                                if (bitmap & mask) {
7164                                        if (counter < ao_cu_num)
7165                                                ao_bitmap |= mask;
7166                                        counter ++;
7167                                }
7168                                mask <<= 1;
7169                        }
7170                        active_cu_number += counter;
7171                        if (i < 2 && j < 2)
7172                                ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7173                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7174                }
7175        }
7176        gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7177        mutex_unlock(&adev->grbm_idx_mutex);
7178
7179        cu_info->number = active_cu_number;
7180        cu_info->ao_cu_mask = ao_cu_mask;
7181        cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7182        cu_info->max_waves_per_simd = 10;
7183        cu_info->max_scratch_slots_per_cu = 32;
7184        cu_info->wave_front_size = 64;
7185        cu_info->lds_size = 64;
7186}
7187
7188const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7189{
7190        .type = AMD_IP_BLOCK_TYPE_GFX,
7191        .major = 8,
7192        .minor = 0,
7193        .rev = 0,
7194        .funcs = &gfx_v8_0_ip_funcs,
7195};
7196
7197const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7198{
7199        .type = AMD_IP_BLOCK_TYPE_GFX,
7200        .major = 8,
7201        .minor = 1,
7202        .rev = 0,
7203        .funcs = &gfx_v8_0_ip_funcs,
7204};
7205
7206static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7207{
7208        uint64_t ce_payload_addr;
7209        int cnt_ce;
7210        union {
7211                struct vi_ce_ib_state regular;
7212                struct vi_ce_ib_state_chained_ib chained;
7213        } ce_payload = {};
7214
7215        if (ring->adev->virt.chained_ib_support) {
7216                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7217                        offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7218                cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7219        } else {
7220                ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7221                        offsetof(struct vi_gfx_meta_data, ce_payload);
7222                cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7223        }
7224
7225        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7226        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7227                                WRITE_DATA_DST_SEL(8) |
7228                                WR_CONFIRM) |
7229                                WRITE_DATA_CACHE_POLICY(0));
7230        amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7231        amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7232        amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7233}
7234
7235static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7236{
7237        uint64_t de_payload_addr, gds_addr, csa_addr;
7238        int cnt_de;
7239        union {
7240                struct vi_de_ib_state regular;
7241                struct vi_de_ib_state_chained_ib chained;
7242        } de_payload = {};
7243
7244        csa_addr = amdgpu_csa_vaddr(ring->adev);
7245        gds_addr = csa_addr + 4096;
7246        if (ring->adev->virt.chained_ib_support) {
7247                de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7248                de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7249                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7250                cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7251        } else {
7252                de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7253                de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7254                de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7255                cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7256        }
7257
7258        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7259        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7260                                WRITE_DATA_DST_SEL(8) |
7261                                WR_CONFIRM) |
7262                                WRITE_DATA_CACHE_POLICY(0));
7263        amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7264        amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7265        amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7266}
7267