1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/kernel.h>
24#include <linux/firmware.h>
25#include <drm/drmP.h>
26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
28#include "vi.h"
29#include "vi_structs.h"
30#include "vid.h"
31#include "amdgpu_ucode.h"
32#include "amdgpu_atombios.h"
33#include "atombios_i2c.h"
34#include "clearstate_vi.h"
35
36#include "gmc/gmc_8_2_d.h"
37#include "gmc/gmc_8_2_sh_mask.h"
38
39#include "oss/oss_3_0_d.h"
40#include "oss/oss_3_0_sh_mask.h"
41
42#include "bif/bif_5_0_d.h"
43#include "bif/bif_5_0_sh_mask.h"
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
52#include "smu/smu_7_1_3_d.h"
53
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_MEC_HPD_SIZE 2048
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83
84enum {
85 BPM_REG_CGLS_EN = 0,
86 BPM_REG_CGLS_ON,
87 BPM_REG_CGCG_OVERRIDE,
88 BPM_REG_MGCG_OVERRIDE,
89 BPM_REG_FGCG_OVERRIDE,
90 BPM_REG_FGCG_MAX
91};
92
93#define RLC_FormatDirectRegListLength 14
94
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
171static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172{
173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189};
190
191static const u32 golden_settings_tonga_a11[] =
192{
193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209};
210
211static const u32 tonga_golden_common_all[] =
212{
213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221};
222
223static const u32 tonga_mgcg_cgcg_init[] =
224{
225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300};
301
302static const u32 golden_settings_vegam_a11[] =
303{
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321};
322
323static const u32 vegam_golden_common_all[] =
324{
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331};
332
333static const u32 golden_settings_polaris11_a11[] =
334{
335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345 mmSQ_CONFIG, 0x07f80000, 0x01180000,
346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352};
353
354static const u32 polaris11_golden_common_all[] =
355{
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362};
363
364static const u32 golden_settings_polaris10_a11[] =
365{
366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383};
384
385static const u32 polaris10_golden_common_all[] =
386{
387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395};
396
397static const u32 fiji_golden_common_all[] =
398{
399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409};
410
411static const u32 golden_settings_fiji_a10[] =
412{
413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424};
425
426static const u32 fiji_mgcg_cgcg_init[] =
427{
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463};
464
465static const u32 golden_settings_iceland_a11[] =
466{
467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483};
484
485static const u32 iceland_golden_common_all[] =
486{
487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495};
496
497static const u32 iceland_mgcg_cgcg_init[] =
498{
499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563};
564
565static const u32 cz_golden_settings_a11[] =
566{
567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579};
580
581static const u32 cz_golden_common_all[] =
582{
583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591};
592
593static const u32 cz_mgcg_cgcg_init[] =
594{
595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670};
671
672static const u32 stoney_golden_settings_a11[] =
673{
674 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684};
685
686static const u32 stoney_golden_common_all[] =
687{
688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696};
697
698static const u32 stoney_mgcg_cgcg_init[] =
699{
700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705};
706
707static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
710static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
711static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
712static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
713static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
715
716static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
717{
718 switch (adev->asic_type) {
719 case CHIP_TOPAZ:
720 amdgpu_device_program_register_sequence(adev,
721 iceland_mgcg_cgcg_init,
722 ARRAY_SIZE(iceland_mgcg_cgcg_init));
723 amdgpu_device_program_register_sequence(adev,
724 golden_settings_iceland_a11,
725 ARRAY_SIZE(golden_settings_iceland_a11));
726 amdgpu_device_program_register_sequence(adev,
727 iceland_golden_common_all,
728 ARRAY_SIZE(iceland_golden_common_all));
729 break;
730 case CHIP_FIJI:
731 amdgpu_device_program_register_sequence(adev,
732 fiji_mgcg_cgcg_init,
733 ARRAY_SIZE(fiji_mgcg_cgcg_init));
734 amdgpu_device_program_register_sequence(adev,
735 golden_settings_fiji_a10,
736 ARRAY_SIZE(golden_settings_fiji_a10));
737 amdgpu_device_program_register_sequence(adev,
738 fiji_golden_common_all,
739 ARRAY_SIZE(fiji_golden_common_all));
740 break;
741
742 case CHIP_TONGA:
743 amdgpu_device_program_register_sequence(adev,
744 tonga_mgcg_cgcg_init,
745 ARRAY_SIZE(tonga_mgcg_cgcg_init));
746 amdgpu_device_program_register_sequence(adev,
747 golden_settings_tonga_a11,
748 ARRAY_SIZE(golden_settings_tonga_a11));
749 amdgpu_device_program_register_sequence(adev,
750 tonga_golden_common_all,
751 ARRAY_SIZE(tonga_golden_common_all));
752 break;
753 case CHIP_VEGAM:
754 amdgpu_device_program_register_sequence(adev,
755 golden_settings_vegam_a11,
756 ARRAY_SIZE(golden_settings_vegam_a11));
757 amdgpu_device_program_register_sequence(adev,
758 vegam_golden_common_all,
759 ARRAY_SIZE(vegam_golden_common_all));
760 break;
761 case CHIP_POLARIS11:
762 case CHIP_POLARIS12:
763 amdgpu_device_program_register_sequence(adev,
764 golden_settings_polaris11_a11,
765 ARRAY_SIZE(golden_settings_polaris11_a11));
766 amdgpu_device_program_register_sequence(adev,
767 polaris11_golden_common_all,
768 ARRAY_SIZE(polaris11_golden_common_all));
769 break;
770 case CHIP_POLARIS10:
771 amdgpu_device_program_register_sequence(adev,
772 golden_settings_polaris10_a11,
773 ARRAY_SIZE(golden_settings_polaris10_a11));
774 amdgpu_device_program_register_sequence(adev,
775 polaris10_golden_common_all,
776 ARRAY_SIZE(polaris10_golden_common_all));
777 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
778 if (adev->pdev->revision == 0xc7 &&
779 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
782 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
784 }
785 break;
786 case CHIP_CARRIZO:
787 amdgpu_device_program_register_sequence(adev,
788 cz_mgcg_cgcg_init,
789 ARRAY_SIZE(cz_mgcg_cgcg_init));
790 amdgpu_device_program_register_sequence(adev,
791 cz_golden_settings_a11,
792 ARRAY_SIZE(cz_golden_settings_a11));
793 amdgpu_device_program_register_sequence(adev,
794 cz_golden_common_all,
795 ARRAY_SIZE(cz_golden_common_all));
796 break;
797 case CHIP_STONEY:
798 amdgpu_device_program_register_sequence(adev,
799 stoney_mgcg_cgcg_init,
800 ARRAY_SIZE(stoney_mgcg_cgcg_init));
801 amdgpu_device_program_register_sequence(adev,
802 stoney_golden_settings_a11,
803 ARRAY_SIZE(stoney_golden_settings_a11));
804 amdgpu_device_program_register_sequence(adev,
805 stoney_golden_common_all,
806 ARRAY_SIZE(stoney_golden_common_all));
807 break;
808 default:
809 break;
810 }
811}
812
813static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
814{
815 adev->gfx.scratch.num_reg = 8;
816 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
817 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
818}
819
820static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
821{
822 struct amdgpu_device *adev = ring->adev;
823 uint32_t scratch;
824 uint32_t tmp = 0;
825 unsigned i;
826 int r;
827
828 r = amdgpu_gfx_scratch_get(adev, &scratch);
829 if (r) {
830 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
831 return r;
832 }
833 WREG32(scratch, 0xCAFEDEAD);
834 r = amdgpu_ring_alloc(ring, 3);
835 if (r) {
836 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
837 ring->idx, r);
838 amdgpu_gfx_scratch_free(adev, scratch);
839 return r;
840 }
841 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843 amdgpu_ring_write(ring, 0xDEADBEEF);
844 amdgpu_ring_commit(ring);
845
846 for (i = 0; i < adev->usec_timeout; i++) {
847 tmp = RREG32(scratch);
848 if (tmp == 0xDEADBEEF)
849 break;
850 DRM_UDELAY(1);
851 }
852 if (i < adev->usec_timeout) {
853 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
854 ring->idx, i);
855 } else {
856 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857 ring->idx, scratch, tmp);
858 r = -EINVAL;
859 }
860 amdgpu_gfx_scratch_free(adev, scratch);
861 return r;
862}
863
864static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
865{
866 struct amdgpu_device *adev = ring->adev;
867 struct amdgpu_ib ib;
868 struct dma_fence *f = NULL;
869 uint32_t scratch;
870 uint32_t tmp = 0;
871 long r;
872
873 r = amdgpu_gfx_scratch_get(adev, &scratch);
874 if (r) {
875 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
876 return r;
877 }
878 WREG32(scratch, 0xCAFEDEAD);
879 memset(&ib, 0, sizeof(ib));
880 r = amdgpu_ib_get(adev, NULL, 256, &ib);
881 if (r) {
882 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
883 goto err1;
884 }
885 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
886 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
887 ib.ptr[2] = 0xDEADBEEF;
888 ib.length_dw = 3;
889
890 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
891 if (r)
892 goto err2;
893
894 r = dma_fence_wait_timeout(f, false, timeout);
895 if (r == 0) {
896 DRM_ERROR("amdgpu: IB test timed out.\n");
897 r = -ETIMEDOUT;
898 goto err2;
899 } else if (r < 0) {
900 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
901 goto err2;
902 }
903 tmp = RREG32(scratch);
904 if (tmp == 0xDEADBEEF) {
905 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
906 r = 0;
907 } else {
908 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
909 scratch, tmp);
910 r = -EINVAL;
911 }
912err2:
913 amdgpu_ib_free(adev, &ib, NULL);
914 dma_fence_put(f);
915err1:
916 amdgpu_gfx_scratch_free(adev, scratch);
917 return r;
918}
919
920
921static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
922{
923 release_firmware(adev->gfx.pfp_fw);
924 adev->gfx.pfp_fw = NULL;
925 release_firmware(adev->gfx.me_fw);
926 adev->gfx.me_fw = NULL;
927 release_firmware(adev->gfx.ce_fw);
928 adev->gfx.ce_fw = NULL;
929 release_firmware(adev->gfx.rlc_fw);
930 adev->gfx.rlc_fw = NULL;
931 release_firmware(adev->gfx.mec_fw);
932 adev->gfx.mec_fw = NULL;
933 if ((adev->asic_type != CHIP_STONEY) &&
934 (adev->asic_type != CHIP_TOPAZ))
935 release_firmware(adev->gfx.mec2_fw);
936 adev->gfx.mec2_fw = NULL;
937
938 kfree(adev->gfx.rlc.register_list_format);
939}
940
941static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
942{
943 const char *chip_name;
944 char fw_name[30];
945 int err;
946 struct amdgpu_firmware_info *info = NULL;
947 const struct common_firmware_header *header = NULL;
948 const struct gfx_firmware_header_v1_0 *cp_hdr;
949 const struct rlc_firmware_header_v2_0 *rlc_hdr;
950 unsigned int *tmp = NULL, i;
951
952 DRM_DEBUG("\n");
953
954 switch (adev->asic_type) {
955 case CHIP_TOPAZ:
956 chip_name = "topaz";
957 break;
958 case CHIP_TONGA:
959 chip_name = "tonga";
960 break;
961 case CHIP_CARRIZO:
962 chip_name = "carrizo";
963 break;
964 case CHIP_FIJI:
965 chip_name = "fiji";
966 break;
967 case CHIP_STONEY:
968 chip_name = "stoney";
969 break;
970 case CHIP_POLARIS10:
971 chip_name = "polaris10";
972 break;
973 case CHIP_POLARIS11:
974 chip_name = "polaris11";
975 break;
976 case CHIP_POLARIS12:
977 chip_name = "polaris12";
978 break;
979 case CHIP_VEGAM:
980 chip_name = "vegam";
981 break;
982 default:
983 BUG();
984 }
985
986 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
988 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
989 if (err == -ENOENT) {
990 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
991 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
992 }
993 } else {
994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 }
997 if (err)
998 goto out;
999 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1000 if (err)
1001 goto out;
1002 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1003 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005
1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1009 if (err == -ENOENT) {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1011 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1012 }
1013 } else {
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 }
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1020 if (err)
1021 goto out;
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1023 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024
1025 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026
1027 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1030 if (err == -ENOENT) {
1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1032 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1033 }
1034 } else {
1035 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 }
1038 if (err)
1039 goto out;
1040 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1041 if (err)
1042 goto out;
1043 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1044 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047
1048
1049
1050
1051 if (adev->gfx.ce_feature_version >= 46 &&
1052 adev->gfx.pfp_feature_version >= 46) {
1053 adev->virt.chained_ib_support = true;
1054 DRM_INFO("Chained IB support enabled!\n");
1055 } else
1056 adev->virt.chained_ib_support = false;
1057
1058 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1059 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1060 if (err)
1061 goto out;
1062 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1063 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1064 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1065 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1066
1067 adev->gfx.rlc.save_and_restore_offset =
1068 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1069 adev->gfx.rlc.clear_state_descriptor_offset =
1070 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1071 adev->gfx.rlc.avail_scratch_ram_locations =
1072 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1073 adev->gfx.rlc.reg_restore_list_size =
1074 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1075 adev->gfx.rlc.reg_list_format_start =
1076 le32_to_cpu(rlc_hdr->reg_list_format_start);
1077 adev->gfx.rlc.reg_list_format_separate_start =
1078 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1079 adev->gfx.rlc.starting_offsets_start =
1080 le32_to_cpu(rlc_hdr->starting_offsets_start);
1081 adev->gfx.rlc.reg_list_format_size_bytes =
1082 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1083 adev->gfx.rlc.reg_list_size_bytes =
1084 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1085
1086 adev->gfx.rlc.register_list_format =
1087 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1088 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1089
1090 if (!adev->gfx.rlc.register_list_format) {
1091 err = -ENOMEM;
1092 goto out;
1093 }
1094
1095 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1096 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1097 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1098 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1099
1100 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1101
1102 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1104 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1105 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1106
1107 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1108 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1109 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1110 if (err == -ENOENT) {
1111 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1112 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1113 }
1114 } else {
1115 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 }
1118 if (err)
1119 goto out;
1120 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1121 if (err)
1122 goto out;
1123 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1124 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1125 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1126
1127 if ((adev->asic_type != CHIP_STONEY) &&
1128 (adev->asic_type != CHIP_TOPAZ)) {
1129 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1131 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1132 if (err == -ENOENT) {
1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1134 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1135 }
1136 } else {
1137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 }
1140 if (!err) {
1141 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1142 if (err)
1143 goto out;
1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1145 adev->gfx.mec2_fw->data;
1146 adev->gfx.mec2_fw_version =
1147 le32_to_cpu(cp_hdr->header.ucode_version);
1148 adev->gfx.mec2_feature_version =
1149 le32_to_cpu(cp_hdr->ucode_feature_version);
1150 } else {
1151 err = 0;
1152 adev->gfx.mec2_fw = NULL;
1153 }
1154 }
1155
1156 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1158 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1159 info->fw = adev->gfx.pfp_fw;
1160 header = (const struct common_firmware_header *)info->fw->data;
1161 adev->firmware.fw_size +=
1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163
1164 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1165 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1166 info->fw = adev->gfx.me_fw;
1167 header = (const struct common_firmware_header *)info->fw->data;
1168 adev->firmware.fw_size +=
1169 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1172 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1173 info->fw = adev->gfx.ce_fw;
1174 header = (const struct common_firmware_header *)info->fw->data;
1175 adev->firmware.fw_size +=
1176 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1179 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1180 info->fw = adev->gfx.rlc_fw;
1181 header = (const struct common_firmware_header *)info->fw->data;
1182 adev->firmware.fw_size +=
1183 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1186 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1187 info->fw = adev->gfx.mec_fw;
1188 header = (const struct common_firmware_header *)info->fw->data;
1189 adev->firmware.fw_size +=
1190 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192
1193 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194 adev->firmware.fw_size +=
1195 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1196
1197 if (amdgpu_sriov_vf(adev)) {
1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1199 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1200 info->fw = adev->gfx.mec_fw;
1201 adev->firmware.fw_size +=
1202 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1203 }
1204
1205 if (adev->gfx.mec2_fw) {
1206 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1207 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1208 info->fw = adev->gfx.mec2_fw;
1209 header = (const struct common_firmware_header *)info->fw->data;
1210 adev->firmware.fw_size +=
1211 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212 }
1213
1214 }
1215
1216out:
1217 if (err) {
1218 dev_err(adev->dev,
1219 "gfx8: Failed to load firmware \"%s\"\n",
1220 fw_name);
1221 release_firmware(adev->gfx.pfp_fw);
1222 adev->gfx.pfp_fw = NULL;
1223 release_firmware(adev->gfx.me_fw);
1224 adev->gfx.me_fw = NULL;
1225 release_firmware(adev->gfx.ce_fw);
1226 adev->gfx.ce_fw = NULL;
1227 release_firmware(adev->gfx.rlc_fw);
1228 adev->gfx.rlc_fw = NULL;
1229 release_firmware(adev->gfx.mec_fw);
1230 adev->gfx.mec_fw = NULL;
1231 release_firmware(adev->gfx.mec2_fw);
1232 adev->gfx.mec2_fw = NULL;
1233 }
1234 return err;
1235}
1236
1237static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1238 volatile u32 *buffer)
1239{
1240 u32 count = 0, i;
1241 const struct cs_section_def *sect = NULL;
1242 const struct cs_extent_def *ext = NULL;
1243
1244 if (adev->gfx.rlc.cs_data == NULL)
1245 return;
1246 if (buffer == NULL)
1247 return;
1248
1249 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1250 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1251
1252 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1253 buffer[count++] = cpu_to_le32(0x80000000);
1254 buffer[count++] = cpu_to_le32(0x80000000);
1255
1256 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1257 for (ext = sect->section; ext->extent != NULL; ++ext) {
1258 if (sect->id == SECT_CONTEXT) {
1259 buffer[count++] =
1260 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1261 buffer[count++] = cpu_to_le32(ext->reg_index -
1262 PACKET3_SET_CONTEXT_REG_START);
1263 for (i = 0; i < ext->reg_count; i++)
1264 buffer[count++] = cpu_to_le32(ext->extent[i]);
1265 } else {
1266 return;
1267 }
1268 }
1269 }
1270
1271 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1272 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1273 PACKET3_SET_CONTEXT_REG_START);
1274 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1275 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1276
1277 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1278 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1279
1280 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1281 buffer[count++] = cpu_to_le32(0);
1282}
1283
1284static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1285{
1286 const __le32 *fw_data;
1287 volatile u32 *dst_ptr;
1288 int me, i, max_me = 4;
1289 u32 bo_offset = 0;
1290 u32 table_offset, table_size;
1291
1292 if (adev->asic_type == CHIP_CARRIZO)
1293 max_me = 5;
1294
1295
1296 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1297 for (me = 0; me < max_me; me++) {
1298 if (me == 0) {
1299 const struct gfx_firmware_header_v1_0 *hdr =
1300 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1301 fw_data = (const __le32 *)
1302 (adev->gfx.ce_fw->data +
1303 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1304 table_offset = le32_to_cpu(hdr->jt_offset);
1305 table_size = le32_to_cpu(hdr->jt_size);
1306 } else if (me == 1) {
1307 const struct gfx_firmware_header_v1_0 *hdr =
1308 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1309 fw_data = (const __le32 *)
1310 (adev->gfx.pfp_fw->data +
1311 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1312 table_offset = le32_to_cpu(hdr->jt_offset);
1313 table_size = le32_to_cpu(hdr->jt_size);
1314 } else if (me == 2) {
1315 const struct gfx_firmware_header_v1_0 *hdr =
1316 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1317 fw_data = (const __le32 *)
1318 (adev->gfx.me_fw->data +
1319 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1320 table_offset = le32_to_cpu(hdr->jt_offset);
1321 table_size = le32_to_cpu(hdr->jt_size);
1322 } else if (me == 3) {
1323 const struct gfx_firmware_header_v1_0 *hdr =
1324 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1325 fw_data = (const __le32 *)
1326 (adev->gfx.mec_fw->data +
1327 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1328 table_offset = le32_to_cpu(hdr->jt_offset);
1329 table_size = le32_to_cpu(hdr->jt_size);
1330 } else if (me == 4) {
1331 const struct gfx_firmware_header_v1_0 *hdr =
1332 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1333 fw_data = (const __le32 *)
1334 (adev->gfx.mec2_fw->data +
1335 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1336 table_offset = le32_to_cpu(hdr->jt_offset);
1337 table_size = le32_to_cpu(hdr->jt_size);
1338 }
1339
1340 for (i = 0; i < table_size; i ++) {
1341 dst_ptr[bo_offset + i] =
1342 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1343 }
1344
1345 bo_offset += table_size;
1346 }
1347}
1348
1349static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1350{
1351 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1352 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1353}
1354
1355static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1356{
1357 volatile u32 *dst_ptr;
1358 u32 dws;
1359 const struct cs_section_def *cs_data;
1360 int r;
1361
1362 adev->gfx.rlc.cs_data = vi_cs_data;
1363
1364 cs_data = adev->gfx.rlc.cs_data;
1365
1366 if (cs_data) {
1367
1368 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1369
1370 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1371 AMDGPU_GEM_DOMAIN_VRAM,
1372 &adev->gfx.rlc.clear_state_obj,
1373 &adev->gfx.rlc.clear_state_gpu_addr,
1374 (void **)&adev->gfx.rlc.cs_ptr);
1375 if (r) {
1376 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1377 gfx_v8_0_rlc_fini(adev);
1378 return r;
1379 }
1380
1381
1382 dst_ptr = adev->gfx.rlc.cs_ptr;
1383 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1384 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1385 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1386 }
1387
1388 if ((adev->asic_type == CHIP_CARRIZO) ||
1389 (adev->asic_type == CHIP_STONEY)) {
1390 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024);
1391 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1392 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1393 &adev->gfx.rlc.cp_table_obj,
1394 &adev->gfx.rlc.cp_table_gpu_addr,
1395 (void **)&adev->gfx.rlc.cp_table_ptr);
1396 if (r) {
1397 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1398 return r;
1399 }
1400
1401 cz_init_cp_jump_table(adev);
1402
1403 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1404 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1405 }
1406
1407 return 0;
1408}
1409
1410static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1411{
1412 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1413}
1414
1415static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1416{
1417 int r;
1418 u32 *hpd;
1419 size_t mec_hpd_size;
1420
1421 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1422
1423
1424 amdgpu_gfx_compute_queue_acquire(adev);
1425
1426 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1427
1428 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1429 AMDGPU_GEM_DOMAIN_GTT,
1430 &adev->gfx.mec.hpd_eop_obj,
1431 &adev->gfx.mec.hpd_eop_gpu_addr,
1432 (void **)&hpd);
1433 if (r) {
1434 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1435 return r;
1436 }
1437
1438 memset(hpd, 0, mec_hpd_size);
1439
1440 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1441 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1442
1443 return 0;
1444}
1445
1446static const u32 vgpr_init_compute_shader[] =
1447{
1448 0x7e000209, 0x7e020208,
1449 0x7e040207, 0x7e060206,
1450 0x7e080205, 0x7e0a0204,
1451 0x7e0c0203, 0x7e0e0202,
1452 0x7e100201, 0x7e120200,
1453 0x7e140209, 0x7e160208,
1454 0x7e180207, 0x7e1a0206,
1455 0x7e1c0205, 0x7e1e0204,
1456 0x7e200203, 0x7e220202,
1457 0x7e240201, 0x7e260200,
1458 0x7e280209, 0x7e2a0208,
1459 0x7e2c0207, 0x7e2e0206,
1460 0x7e300205, 0x7e320204,
1461 0x7e340203, 0x7e360202,
1462 0x7e380201, 0x7e3a0200,
1463 0x7e3c0209, 0x7e3e0208,
1464 0x7e400207, 0x7e420206,
1465 0x7e440205, 0x7e460204,
1466 0x7e480203, 0x7e4a0202,
1467 0x7e4c0201, 0x7e4e0200,
1468 0x7e500209, 0x7e520208,
1469 0x7e540207, 0x7e560206,
1470 0x7e580205, 0x7e5a0204,
1471 0x7e5c0203, 0x7e5e0202,
1472 0x7e600201, 0x7e620200,
1473 0x7e640209, 0x7e660208,
1474 0x7e680207, 0x7e6a0206,
1475 0x7e6c0205, 0x7e6e0204,
1476 0x7e700203, 0x7e720202,
1477 0x7e740201, 0x7e760200,
1478 0x7e780209, 0x7e7a0208,
1479 0x7e7c0207, 0x7e7e0206,
1480 0xbf8a0000, 0xbf810000,
1481};
1482
1483static const u32 sgpr_init_compute_shader[] =
1484{
1485 0xbe8a0100, 0xbe8c0102,
1486 0xbe8e0104, 0xbe900106,
1487 0xbe920108, 0xbe940100,
1488 0xbe960102, 0xbe980104,
1489 0xbe9a0106, 0xbe9c0108,
1490 0xbe9e0100, 0xbea00102,
1491 0xbea20104, 0xbea40106,
1492 0xbea60108, 0xbea80100,
1493 0xbeaa0102, 0xbeac0104,
1494 0xbeae0106, 0xbeb00108,
1495 0xbeb20100, 0xbeb40102,
1496 0xbeb60104, 0xbeb80106,
1497 0xbeba0108, 0xbebc0100,
1498 0xbebe0102, 0xbec00104,
1499 0xbec20106, 0xbec40108,
1500 0xbec60100, 0xbec80102,
1501 0xbee60004, 0xbee70005,
1502 0xbeea0006, 0xbeeb0007,
1503 0xbee80008, 0xbee90009,
1504 0xbefc0000, 0xbf8a0000,
1505 0xbf810000, 0x00000000,
1506};
1507
1508static const u32 vgpr_init_regs[] =
1509{
1510 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1511 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1512 mmCOMPUTE_NUM_THREAD_X, 256*4,
1513 mmCOMPUTE_NUM_THREAD_Y, 1,
1514 mmCOMPUTE_NUM_THREAD_Z, 1,
1515 mmCOMPUTE_PGM_RSRC1, 0x100004f,
1516 mmCOMPUTE_PGM_RSRC2, 20,
1517 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1518 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1519 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1520 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1521 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1522 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1523 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1524 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1525 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1526 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1527};
1528
1529static const u32 sgpr1_init_regs[] =
1530{
1531 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1532 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1533 mmCOMPUTE_NUM_THREAD_X, 256*5,
1534 mmCOMPUTE_NUM_THREAD_Y, 1,
1535 mmCOMPUTE_NUM_THREAD_Z, 1,
1536 mmCOMPUTE_PGM_RSRC1, 0x240,
1537 mmCOMPUTE_PGM_RSRC2, 20,
1538 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1539 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1540 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1541 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1542 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1543 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1544 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1545 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1546 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1547 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1548};
1549
1550static const u32 sgpr2_init_regs[] =
1551{
1552 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1553 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1554 mmCOMPUTE_NUM_THREAD_X, 256*5,
1555 mmCOMPUTE_NUM_THREAD_Y, 1,
1556 mmCOMPUTE_NUM_THREAD_Z, 1,
1557 mmCOMPUTE_PGM_RSRC1, 0x240,
1558 mmCOMPUTE_PGM_RSRC2, 20,
1559 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569};
1570
1571static const u32 sec_ded_counter_registers[] =
1572{
1573 mmCPC_EDC_ATC_CNT,
1574 mmCPC_EDC_SCRATCH_CNT,
1575 mmCPC_EDC_UCODE_CNT,
1576 mmCPF_EDC_ATC_CNT,
1577 mmCPF_EDC_ROQ_CNT,
1578 mmCPF_EDC_TAG_CNT,
1579 mmCPG_EDC_ATC_CNT,
1580 mmCPG_EDC_DMA_CNT,
1581 mmCPG_EDC_TAG_CNT,
1582 mmDC_EDC_CSINVOC_CNT,
1583 mmDC_EDC_RESTORE_CNT,
1584 mmDC_EDC_STATE_CNT,
1585 mmGDS_EDC_CNT,
1586 mmGDS_EDC_GRBM_CNT,
1587 mmGDS_EDC_OA_DED,
1588 mmSPI_EDC_CNT,
1589 mmSQC_ATC_EDC_GATCL1_CNT,
1590 mmSQC_EDC_CNT,
1591 mmSQ_EDC_DED_CNT,
1592 mmSQ_EDC_INFO,
1593 mmSQ_EDC_SEC_CNT,
1594 mmTCC_EDC_CNT,
1595 mmTCP_ATC_EDC_GATCL1_CNT,
1596 mmTCP_EDC_CNT,
1597 mmTD_EDC_CNT
1598};
1599
1600static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1601{
1602 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1603 struct amdgpu_ib ib;
1604 struct dma_fence *f = NULL;
1605 int r, i;
1606 u32 tmp;
1607 unsigned total_size, vgpr_offset, sgpr_offset;
1608 u64 gpu_addr;
1609
1610
1611 if (adev->asic_type != CHIP_CARRIZO)
1612 return 0;
1613
1614
1615 if (!ring->ready)
1616 return 0;
1617
1618 tmp = RREG32(mmGB_EDC_MODE);
1619 WREG32(mmGB_EDC_MODE, 0);
1620
1621 total_size =
1622 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1623 total_size +=
1624 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625 total_size +=
1626 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627 total_size = ALIGN(total_size, 256);
1628 vgpr_offset = total_size;
1629 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1630 sgpr_offset = total_size;
1631 total_size += sizeof(sgpr_init_compute_shader);
1632
1633
1634 memset(&ib, 0, sizeof(ib));
1635 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1636 if (r) {
1637 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1638 return r;
1639 }
1640
1641
1642 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1643 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1644
1645 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1646 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1647
1648
1649 ib.length_dw = 0;
1650
1651
1652
1653 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1657 }
1658
1659 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8;
1668 ib.ptr[ib.length_dw++] = 1;
1669 ib.ptr[ib.length_dw++] = 1;
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677
1678
1679 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1683 }
1684
1685 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 ib.ptr[ib.length_dw++] = 8;
1694 ib.ptr[ib.length_dw++] = 1;
1695 ib.ptr[ib.length_dw++] = 1;
1696 ib.ptr[ib.length_dw++] =
1697 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703
1704
1705 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1707 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1708 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1709 }
1710
1711 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1713 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1714 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1715 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1716
1717
1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1719 ib.ptr[ib.length_dw++] = 8;
1720 ib.ptr[ib.length_dw++] = 1;
1721 ib.ptr[ib.length_dw++] = 1;
1722 ib.ptr[ib.length_dw++] =
1723 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1724
1725
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1727 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1728
1729
1730 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1731 if (r) {
1732 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1733 goto fail;
1734 }
1735
1736
1737 r = dma_fence_wait(f, false);
1738 if (r) {
1739 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1740 goto fail;
1741 }
1742
1743 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1744 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1745 WREG32(mmGB_EDC_MODE, tmp);
1746
1747 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1748 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1749 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1750
1751
1752
1753 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1754 RREG32(sec_ded_counter_registers[i]);
1755
1756fail:
1757 amdgpu_ib_free(adev, &ib, NULL);
1758 dma_fence_put(f);
1759
1760 return r;
1761}
1762
1763static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1764{
1765 u32 gb_addr_config;
1766 u32 mc_shared_chmap, mc_arb_ramcfg;
1767 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1768 u32 tmp;
1769 int ret;
1770
1771 switch (adev->asic_type) {
1772 case CHIP_TOPAZ:
1773 adev->gfx.config.max_shader_engines = 1;
1774 adev->gfx.config.max_tile_pipes = 2;
1775 adev->gfx.config.max_cu_per_sh = 6;
1776 adev->gfx.config.max_sh_per_se = 1;
1777 adev->gfx.config.max_backends_per_se = 2;
1778 adev->gfx.config.max_texture_channel_caches = 2;
1779 adev->gfx.config.max_gprs = 256;
1780 adev->gfx.config.max_gs_threads = 32;
1781 adev->gfx.config.max_hw_contexts = 8;
1782
1783 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1788 break;
1789 case CHIP_FIJI:
1790 adev->gfx.config.max_shader_engines = 4;
1791 adev->gfx.config.max_tile_pipes = 16;
1792 adev->gfx.config.max_cu_per_sh = 16;
1793 adev->gfx.config.max_sh_per_se = 1;
1794 adev->gfx.config.max_backends_per_se = 4;
1795 adev->gfx.config.max_texture_channel_caches = 16;
1796 adev->gfx.config.max_gprs = 256;
1797 adev->gfx.config.max_gs_threads = 32;
1798 adev->gfx.config.max_hw_contexts = 8;
1799
1800 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805 break;
1806 case CHIP_POLARIS11:
1807 case CHIP_POLARIS12:
1808 ret = amdgpu_atombios_get_gfx_info(adev);
1809 if (ret)
1810 return ret;
1811 adev->gfx.config.max_gprs = 256;
1812 adev->gfx.config.max_gs_threads = 32;
1813 adev->gfx.config.max_hw_contexts = 8;
1814
1815 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1820 break;
1821 case CHIP_POLARIS10:
1822 case CHIP_VEGAM:
1823 ret = amdgpu_atombios_get_gfx_info(adev);
1824 if (ret)
1825 return ret;
1826 adev->gfx.config.max_gprs = 256;
1827 adev->gfx.config.max_gs_threads = 32;
1828 adev->gfx.config.max_hw_contexts = 8;
1829
1830 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835 break;
1836 case CHIP_TONGA:
1837 adev->gfx.config.max_shader_engines = 4;
1838 adev->gfx.config.max_tile_pipes = 8;
1839 adev->gfx.config.max_cu_per_sh = 8;
1840 adev->gfx.config.max_sh_per_se = 1;
1841 adev->gfx.config.max_backends_per_se = 2;
1842 adev->gfx.config.max_texture_channel_caches = 8;
1843 adev->gfx.config.max_gprs = 256;
1844 adev->gfx.config.max_gs_threads = 32;
1845 adev->gfx.config.max_hw_contexts = 8;
1846
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852 break;
1853 case CHIP_CARRIZO:
1854 adev->gfx.config.max_shader_engines = 1;
1855 adev->gfx.config.max_tile_pipes = 2;
1856 adev->gfx.config.max_sh_per_se = 1;
1857 adev->gfx.config.max_backends_per_se = 2;
1858 adev->gfx.config.max_cu_per_sh = 8;
1859 adev->gfx.config.max_texture_channel_caches = 2;
1860 adev->gfx.config.max_gprs = 256;
1861 adev->gfx.config.max_gs_threads = 32;
1862 adev->gfx.config.max_hw_contexts = 8;
1863
1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 break;
1870 case CHIP_STONEY:
1871 adev->gfx.config.max_shader_engines = 1;
1872 adev->gfx.config.max_tile_pipes = 2;
1873 adev->gfx.config.max_sh_per_se = 1;
1874 adev->gfx.config.max_backends_per_se = 1;
1875 adev->gfx.config.max_cu_per_sh = 3;
1876 adev->gfx.config.max_texture_channel_caches = 2;
1877 adev->gfx.config.max_gprs = 256;
1878 adev->gfx.config.max_gs_threads = 16;
1879 adev->gfx.config.max_hw_contexts = 8;
1880
1881 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886 break;
1887 default:
1888 adev->gfx.config.max_shader_engines = 2;
1889 adev->gfx.config.max_tile_pipes = 4;
1890 adev->gfx.config.max_cu_per_sh = 2;
1891 adev->gfx.config.max_sh_per_se = 1;
1892 adev->gfx.config.max_backends_per_se = 2;
1893 adev->gfx.config.max_texture_channel_caches = 4;
1894 adev->gfx.config.max_gprs = 256;
1895 adev->gfx.config.max_gs_threads = 32;
1896 adev->gfx.config.max_hw_contexts = 8;
1897
1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1903 break;
1904 }
1905
1906 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1907 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1908 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1909
1910 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1911 adev->gfx.config.mem_max_burst_length_bytes = 256;
1912 if (adev->flags & AMD_IS_APU) {
1913
1914 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1915 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1916 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1917
1918 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1919 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1920 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1921
1922
1923 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1924 dimm00_addr_map = 0;
1925 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1926 dimm01_addr_map = 0;
1927 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1928 dimm10_addr_map = 0;
1929 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1930 dimm11_addr_map = 0;
1931
1932
1933
1934 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1935 adev->gfx.config.mem_row_size_in_kb = 2;
1936 else
1937 adev->gfx.config.mem_row_size_in_kb = 1;
1938 } else {
1939 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1940 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1941 if (adev->gfx.config.mem_row_size_in_kb > 4)
1942 adev->gfx.config.mem_row_size_in_kb = 4;
1943 }
1944
1945 adev->gfx.config.shader_engine_tile_size = 32;
1946 adev->gfx.config.num_gpus = 1;
1947 adev->gfx.config.multi_gpu_tile_size = 64;
1948
1949
1950 switch (adev->gfx.config.mem_row_size_in_kb) {
1951 case 1:
1952 default:
1953 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1954 break;
1955 case 2:
1956 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1957 break;
1958 case 4:
1959 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1960 break;
1961 }
1962 adev->gfx.config.gb_addr_config = gb_addr_config;
1963
1964 return 0;
1965}
1966
1967static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1968 int mec, int pipe, int queue)
1969{
1970 int r;
1971 unsigned irq_type;
1972 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1973
1974 ring = &adev->gfx.compute_ring[ring_id];
1975
1976
1977 ring->me = mec + 1;
1978 ring->pipe = pipe;
1979 ring->queue = queue;
1980
1981 ring->ring_obj = NULL;
1982 ring->use_doorbell = true;
1983 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1984 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1985 + (ring_id * GFX8_MEC_HPD_SIZE);
1986 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1987
1988 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1989 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1990 + ring->pipe;
1991
1992
1993 r = amdgpu_ring_init(adev, ring, 1024,
1994 &adev->gfx.eop_irq, irq_type);
1995 if (r)
1996 return r;
1997
1998
1999 return 0;
2000}
2001
2002static int gfx_v8_0_sw_init(void *handle)
2003{
2004 int i, j, k, r, ring_id;
2005 struct amdgpu_ring *ring;
2006 struct amdgpu_kiq *kiq;
2007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2008
2009 switch (adev->asic_type) {
2010 case CHIP_TONGA:
2011 case CHIP_CARRIZO:
2012 case CHIP_FIJI:
2013 case CHIP_POLARIS10:
2014 case CHIP_POLARIS11:
2015 case CHIP_POLARIS12:
2016 case CHIP_VEGAM:
2017 adev->gfx.mec.num_mec = 2;
2018 break;
2019 case CHIP_TOPAZ:
2020 case CHIP_STONEY:
2021 default:
2022 adev->gfx.mec.num_mec = 1;
2023 break;
2024 }
2025
2026 adev->gfx.mec.num_pipe_per_mec = 4;
2027 adev->gfx.mec.num_queue_per_pipe = 8;
2028
2029
2030 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2031 if (r)
2032 return r;
2033
2034
2035 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2036 if (r)
2037 return r;
2038
2039
2040 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2041 &adev->gfx.priv_reg_irq);
2042 if (r)
2043 return r;
2044
2045
2046 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2047 &adev->gfx.priv_inst_irq);
2048 if (r)
2049 return r;
2050
2051 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2052
2053 gfx_v8_0_scratch_init(adev);
2054
2055 r = gfx_v8_0_init_microcode(adev);
2056 if (r) {
2057 DRM_ERROR("Failed to load gfx firmware!\n");
2058 return r;
2059 }
2060
2061 r = gfx_v8_0_rlc_init(adev);
2062 if (r) {
2063 DRM_ERROR("Failed to init rlc BOs!\n");
2064 return r;
2065 }
2066
2067 r = gfx_v8_0_mec_init(adev);
2068 if (r) {
2069 DRM_ERROR("Failed to init MEC BOs!\n");
2070 return r;
2071 }
2072
2073
2074 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2075 ring = &adev->gfx.gfx_ring[i];
2076 ring->ring_obj = NULL;
2077 sprintf(ring->name, "gfx");
2078
2079 if (adev->asic_type != CHIP_TOPAZ) {
2080 ring->use_doorbell = true;
2081 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2082 }
2083
2084 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2085 AMDGPU_CP_IRQ_GFX_EOP);
2086 if (r)
2087 return r;
2088 }
2089
2090
2091
2092 ring_id = 0;
2093 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2094 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2095 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2096 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2097 continue;
2098
2099 r = gfx_v8_0_compute_ring_init(adev,
2100 ring_id,
2101 i, k, j);
2102 if (r)
2103 return r;
2104
2105 ring_id++;
2106 }
2107 }
2108 }
2109
2110 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2111 if (r) {
2112 DRM_ERROR("Failed to init KIQ BOs!\n");
2113 return r;
2114 }
2115
2116 kiq = &adev->gfx.kiq;
2117 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118 if (r)
2119 return r;
2120
2121
2122 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2123 if (r)
2124 return r;
2125
2126
2127 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2128 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2129 &adev->gds.gds_gfx_bo, NULL, NULL);
2130 if (r)
2131 return r;
2132
2133 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2134 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2135 &adev->gds.gws_gfx_bo, NULL, NULL);
2136 if (r)
2137 return r;
2138
2139 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2140 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2141 &adev->gds.oa_gfx_bo, NULL, NULL);
2142 if (r)
2143 return r;
2144
2145 adev->gfx.ce_ram_size = 0x8000;
2146
2147 r = gfx_v8_0_gpu_early_init(adev);
2148 if (r)
2149 return r;
2150
2151 return 0;
2152}
2153
2154static int gfx_v8_0_sw_fini(void *handle)
2155{
2156 int i;
2157 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2158
2159 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2160 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2161 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2162
2163 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2164 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2165 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2166 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2167
2168 amdgpu_gfx_compute_mqd_sw_fini(adev);
2169 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2170 amdgpu_gfx_kiq_fini(adev);
2171
2172 gfx_v8_0_mec_fini(adev);
2173 gfx_v8_0_rlc_fini(adev);
2174 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2175 &adev->gfx.rlc.clear_state_gpu_addr,
2176 (void **)&adev->gfx.rlc.cs_ptr);
2177 if ((adev->asic_type == CHIP_CARRIZO) ||
2178 (adev->asic_type == CHIP_STONEY)) {
2179 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2180 &adev->gfx.rlc.cp_table_gpu_addr,
2181 (void **)&adev->gfx.rlc.cp_table_ptr);
2182 }
2183 gfx_v8_0_free_microcode(adev);
2184
2185 return 0;
2186}
2187
2188static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2189{
2190 uint32_t *modearray, *mod2array;
2191 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2192 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2193 u32 reg_offset;
2194
2195 modearray = adev->gfx.config.tile_mode_array;
2196 mod2array = adev->gfx.config.macrotile_mode_array;
2197
2198 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199 modearray[reg_offset] = 0;
2200
2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2202 mod2array[reg_offset] = 0;
2203
2204 switch (adev->asic_type) {
2205 case CHIP_TOPAZ:
2206 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207 PIPE_CONFIG(ADDR_SURF_P2) |
2208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 PIPE_CONFIG(ADDR_SURF_P2) |
2212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 PIPE_CONFIG(ADDR_SURF_P2) |
2220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223 PIPE_CONFIG(ADDR_SURF_P2) |
2224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231 PIPE_CONFIG(ADDR_SURF_P2) |
2232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2235 PIPE_CONFIG(ADDR_SURF_P2));
2236 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2248 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2264 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2308
2309 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_8_BANK));
2313 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316 NUM_BANKS(ADDR_SURF_8_BANK));
2317 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2320 NUM_BANKS(ADDR_SURF_8_BANK));
2321 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324 NUM_BANKS(ADDR_SURF_8_BANK));
2325 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328 NUM_BANKS(ADDR_SURF_8_BANK));
2329 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332 NUM_BANKS(ADDR_SURF_8_BANK));
2333 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336 NUM_BANKS(ADDR_SURF_8_BANK));
2337 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340 NUM_BANKS(ADDR_SURF_16_BANK));
2341 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344 NUM_BANKS(ADDR_SURF_16_BANK));
2345 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348 NUM_BANKS(ADDR_SURF_16_BANK));
2349 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352 NUM_BANKS(ADDR_SURF_16_BANK));
2353 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356 NUM_BANKS(ADDR_SURF_16_BANK));
2357 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 NUM_BANKS(ADDR_SURF_16_BANK));
2361 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 NUM_BANKS(ADDR_SURF_8_BANK));
2365
2366 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2367 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2368 reg_offset != 23)
2369 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2370
2371 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2372 if (reg_offset != 7)
2373 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374
2375 break;
2376 case CHIP_FIJI:
2377 case CHIP_VEGAM:
2378 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2412 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2461 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2469 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500
2501 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 NUM_BANKS(ADDR_SURF_8_BANK));
2505 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 NUM_BANKS(ADDR_SURF_8_BANK));
2509 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 NUM_BANKS(ADDR_SURF_8_BANK));
2513 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 NUM_BANKS(ADDR_SURF_8_BANK));
2517 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 NUM_BANKS(ADDR_SURF_8_BANK));
2521 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 NUM_BANKS(ADDR_SURF_8_BANK));
2525 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 NUM_BANKS(ADDR_SURF_8_BANK));
2529 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 NUM_BANKS(ADDR_SURF_8_BANK));
2533 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536 NUM_BANKS(ADDR_SURF_8_BANK));
2537 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2541 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544 NUM_BANKS(ADDR_SURF_8_BANK));
2545 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 NUM_BANKS(ADDR_SURF_8_BANK));
2549 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552 NUM_BANKS(ADDR_SURF_8_BANK));
2553 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556 NUM_BANKS(ADDR_SURF_4_BANK));
2557
2558 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2559 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2560
2561 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2562 if (reg_offset != 7)
2563 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2564
2565 break;
2566 case CHIP_TONGA:
2567 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2601 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2650 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689
2690 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 NUM_BANKS(ADDR_SURF_16_BANK));
2694 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697 NUM_BANKS(ADDR_SURF_16_BANK));
2698 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701 NUM_BANKS(ADDR_SURF_16_BANK));
2702 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 NUM_BANKS(ADDR_SURF_16_BANK));
2706 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709 NUM_BANKS(ADDR_SURF_16_BANK));
2710 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713 NUM_BANKS(ADDR_SURF_16_BANK));
2714 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717 NUM_BANKS(ADDR_SURF_16_BANK));
2718 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2720 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721 NUM_BANKS(ADDR_SURF_16_BANK));
2722 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725 NUM_BANKS(ADDR_SURF_16_BANK));
2726 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2730 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733 NUM_BANKS(ADDR_SURF_16_BANK));
2734 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 NUM_BANKS(ADDR_SURF_8_BANK));
2738 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 NUM_BANKS(ADDR_SURF_4_BANK));
2742 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2745 NUM_BANKS(ADDR_SURF_4_BANK));
2746
2747 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2748 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2749
2750 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2751 if (reg_offset != 7)
2752 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2753
2754 break;
2755 case CHIP_POLARIS11:
2756 case CHIP_POLARIS12:
2757 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879
2880 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2903 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913 NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943 NUM_BANKS(ADDR_SURF_8_BANK));
2944
2945 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948 NUM_BANKS(ADDR_SURF_4_BANK));
2949
2950 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2951 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2952
2953 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954 if (reg_offset != 7)
2955 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2956
2957 break;
2958 case CHIP_POLARIS10:
2959 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2989 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2993 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3050 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081
3082 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085 NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095 NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100 NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110 NUM_BANKS(ADDR_SURF_16_BANK));
3111
3112 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 NUM_BANKS(ADDR_SURF_16_BANK));
3116
3117 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 NUM_BANKS(ADDR_SURF_16_BANK));
3121
3122 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125 NUM_BANKS(ADDR_SURF_16_BANK));
3126
3127 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140 NUM_BANKS(ADDR_SURF_8_BANK));
3141
3142 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3145 NUM_BANKS(ADDR_SURF_4_BANK));
3146
3147 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3150 NUM_BANKS(ADDR_SURF_4_BANK));
3151
3152 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3153 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3154
3155 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3156 if (reg_offset != 7)
3157 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3158
3159 break;
3160 case CHIP_STONEY:
3161 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3190 PIPE_CONFIG(ADDR_SURF_P2));
3191 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 PIPE_CONFIG(ADDR_SURF_P2) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196 PIPE_CONFIG(ADDR_SURF_P2) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3219 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263
3264 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 NUM_BANKS(ADDR_SURF_8_BANK));
3268 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 NUM_BANKS(ADDR_SURF_8_BANK));
3272 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275 NUM_BANKS(ADDR_SURF_8_BANK));
3276 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279 NUM_BANKS(ADDR_SURF_8_BANK));
3280 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 NUM_BANKS(ADDR_SURF_8_BANK));
3284 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 NUM_BANKS(ADDR_SURF_8_BANK));
3288 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291 NUM_BANKS(ADDR_SURF_8_BANK));
3292 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 NUM_BANKS(ADDR_SURF_16_BANK));
3296 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299 NUM_BANKS(ADDR_SURF_16_BANK));
3300 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_16_BANK));
3304 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_16_BANK));
3308 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 NUM_BANKS(ADDR_SURF_16_BANK));
3312 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 NUM_BANKS(ADDR_SURF_16_BANK));
3316 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320
3321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3322 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3323 reg_offset != 23)
3324 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3325
3326 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3327 if (reg_offset != 7)
3328 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329
3330 break;
3331 default:
3332 dev_warn(adev->dev,
3333 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3334 adev->asic_type);
3335
3336 case CHIP_CARRIZO:
3337 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3365 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3366 PIPE_CONFIG(ADDR_SURF_P2));
3367 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3368 PIPE_CONFIG(ADDR_SURF_P2) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3372 PIPE_CONFIG(ADDR_SURF_P2) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3391 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3395 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3404 PIPE_CONFIG(ADDR_SURF_P2) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3408 PIPE_CONFIG(ADDR_SURF_P2) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3412 PIPE_CONFIG(ADDR_SURF_P2) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3416 PIPE_CONFIG(ADDR_SURF_P2) |
3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3427 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439
3440 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443 NUM_BANKS(ADDR_SURF_8_BANK));
3444 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447 NUM_BANKS(ADDR_SURF_8_BANK));
3448 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451 NUM_BANKS(ADDR_SURF_8_BANK));
3452 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455 NUM_BANKS(ADDR_SURF_8_BANK));
3456 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459 NUM_BANKS(ADDR_SURF_8_BANK));
3460 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463 NUM_BANKS(ADDR_SURF_8_BANK));
3464 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3467 NUM_BANKS(ADDR_SURF_8_BANK));
3468 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471 NUM_BANKS(ADDR_SURF_16_BANK));
3472 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475 NUM_BANKS(ADDR_SURF_16_BANK));
3476 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 NUM_BANKS(ADDR_SURF_16_BANK));
3480 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 NUM_BANKS(ADDR_SURF_16_BANK));
3484 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487 NUM_BANKS(ADDR_SURF_16_BANK));
3488 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3491 NUM_BANKS(ADDR_SURF_16_BANK));
3492 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496
3497 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3498 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3499 reg_offset != 23)
3500 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3501
3502 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3503 if (reg_offset != 7)
3504 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3505
3506 break;
3507 }
3508}
3509
3510static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3511 u32 se_num, u32 sh_num, u32 instance)
3512{
3513 u32 data;
3514
3515 if (instance == 0xffffffff)
3516 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3517 else
3518 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3519
3520 if (se_num == 0xffffffff)
3521 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3522 else
3523 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3524
3525 if (sh_num == 0xffffffff)
3526 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3527 else
3528 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3529
3530 WREG32(mmGRBM_GFX_INDEX, data);
3531}
3532
3533static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3534 u32 me, u32 pipe, u32 q)
3535{
3536 vi_srbm_select(adev, me, pipe, q, 0);
3537}
3538
3539static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3540{
3541 u32 data, mask;
3542
3543 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3544 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3545
3546 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3547
3548 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3549 adev->gfx.config.max_sh_per_se);
3550
3551 return (~data) & mask;
3552}
3553
3554static void
3555gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3556{
3557 switch (adev->asic_type) {
3558 case CHIP_FIJI:
3559 case CHIP_VEGAM:
3560 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3561 RB_XSEL2(1) | PKR_MAP(2) |
3562 PKR_XSEL(1) | PKR_YSEL(1) |
3563 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3564 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3565 SE_PAIR_YSEL(2);
3566 break;
3567 case CHIP_TONGA:
3568 case CHIP_POLARIS10:
3569 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3570 SE_XSEL(1) | SE_YSEL(1);
3571 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572 SE_PAIR_YSEL(2);
3573 break;
3574 case CHIP_TOPAZ:
3575 case CHIP_CARRIZO:
3576 *rconf |= RB_MAP_PKR0(2);
3577 *rconf1 |= 0x0;
3578 break;
3579 case CHIP_POLARIS11:
3580 case CHIP_POLARIS12:
3581 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3582 SE_XSEL(1) | SE_YSEL(1);
3583 *rconf1 |= 0x0;
3584 break;
3585 case CHIP_STONEY:
3586 *rconf |= 0x0;
3587 *rconf1 |= 0x0;
3588 break;
3589 default:
3590 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3591 break;
3592 }
3593}
3594
3595static void
3596gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3597 u32 raster_config, u32 raster_config_1,
3598 unsigned rb_mask, unsigned num_rb)
3599{
3600 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3601 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3602 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3603 unsigned rb_per_se = num_rb / num_se;
3604 unsigned se_mask[4];
3605 unsigned se;
3606
3607 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3608 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3609 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3610 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3611
3612 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3613 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3614 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3615
3616 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3617 (!se_mask[2] && !se_mask[3]))) {
3618 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3619
3620 if (!se_mask[0] && !se_mask[1]) {
3621 raster_config_1 |=
3622 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3623 } else {
3624 raster_config_1 |=
3625 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3626 }
3627 }
3628
3629 for (se = 0; se < num_se; se++) {
3630 unsigned raster_config_se = raster_config;
3631 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3632 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3633 int idx = (se / 2) * 2;
3634
3635 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3636 raster_config_se &= ~SE_MAP_MASK;
3637
3638 if (!se_mask[idx]) {
3639 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3640 } else {
3641 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3642 }
3643 }
3644
3645 pkr0_mask &= rb_mask;
3646 pkr1_mask &= rb_mask;
3647 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3648 raster_config_se &= ~PKR_MAP_MASK;
3649
3650 if (!pkr0_mask) {
3651 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3652 } else {
3653 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3654 }
3655 }
3656
3657 if (rb_per_se >= 2) {
3658 unsigned rb0_mask = 1 << (se * rb_per_se);
3659 unsigned rb1_mask = rb0_mask << 1;
3660
3661 rb0_mask &= rb_mask;
3662 rb1_mask &= rb_mask;
3663 if (!rb0_mask || !rb1_mask) {
3664 raster_config_se &= ~RB_MAP_PKR0_MASK;
3665
3666 if (!rb0_mask) {
3667 raster_config_se |=
3668 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3669 } else {
3670 raster_config_se |=
3671 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3672 }
3673 }
3674
3675 if (rb_per_se > 2) {
3676 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3677 rb1_mask = rb0_mask << 1;
3678 rb0_mask &= rb_mask;
3679 rb1_mask &= rb_mask;
3680 if (!rb0_mask || !rb1_mask) {
3681 raster_config_se &= ~RB_MAP_PKR1_MASK;
3682
3683 if (!rb0_mask) {
3684 raster_config_se |=
3685 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3686 } else {
3687 raster_config_se |=
3688 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3689 }
3690 }
3691 }
3692 }
3693
3694
3695 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3696 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3697 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3698 }
3699
3700
3701 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3702}
3703
3704static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3705{
3706 int i, j;
3707 u32 data;
3708 u32 raster_config = 0, raster_config_1 = 0;
3709 u32 active_rbs = 0;
3710 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3711 adev->gfx.config.max_sh_per_se;
3712 unsigned num_rb_pipes;
3713
3714 mutex_lock(&adev->grbm_idx_mutex);
3715 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3716 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3717 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3718 data = gfx_v8_0_get_rb_active_bitmap(adev);
3719 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3720 rb_bitmap_width_per_sh);
3721 }
3722 }
3723 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3724
3725 adev->gfx.config.backend_enable_mask = active_rbs;
3726 adev->gfx.config.num_rbs = hweight32(active_rbs);
3727
3728 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3729 adev->gfx.config.max_shader_engines, 16);
3730
3731 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3732
3733 if (!adev->gfx.config.backend_enable_mask ||
3734 adev->gfx.config.num_rbs >= num_rb_pipes) {
3735 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3736 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3737 } else {
3738 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3739 adev->gfx.config.backend_enable_mask,
3740 num_rb_pipes);
3741 }
3742
3743
3744 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3745 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3746 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3747 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3748 RREG32(mmCC_RB_BACKEND_DISABLE);
3749 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3750 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3751 adev->gfx.config.rb_config[i][j].raster_config =
3752 RREG32(mmPA_SC_RASTER_CONFIG);
3753 adev->gfx.config.rb_config[i][j].raster_config_1 =
3754 RREG32(mmPA_SC_RASTER_CONFIG_1);
3755 }
3756 }
3757 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3758 mutex_unlock(&adev->grbm_idx_mutex);
3759}
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769#define DEFAULT_SH_MEM_BASES (0x6000)
3770#define FIRST_COMPUTE_VMID (8)
3771#define LAST_COMPUTE_VMID (16)
3772static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3773{
3774 int i;
3775 uint32_t sh_mem_config;
3776 uint32_t sh_mem_bases;
3777
3778
3779
3780
3781
3782
3783
3784 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3785
3786 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3787 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3788 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3789 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3790 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3791 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3792
3793 mutex_lock(&adev->srbm_mutex);
3794 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3795 vi_srbm_select(adev, 0, 0, 0, i);
3796
3797 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3798 WREG32(mmSH_MEM_APE1_BASE, 1);
3799 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3800 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3801 }
3802 vi_srbm_select(adev, 0, 0, 0, 0);
3803 mutex_unlock(&adev->srbm_mutex);
3804}
3805
3806static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3807{
3808 switch (adev->asic_type) {
3809 default:
3810 adev->gfx.config.double_offchip_lds_buf = 1;
3811 break;
3812 case CHIP_CARRIZO:
3813 case CHIP_STONEY:
3814 adev->gfx.config.double_offchip_lds_buf = 0;
3815 break;
3816 }
3817}
3818
3819static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3820{
3821 u32 tmp, sh_static_mem_cfg;
3822 int i;
3823
3824 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3825 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3826 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3827 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3828
3829 gfx_v8_0_tiling_mode_table_init(adev);
3830 gfx_v8_0_setup_rb(adev);
3831 gfx_v8_0_get_cu_info(adev);
3832 gfx_v8_0_config_init(adev);
3833
3834
3835
3836 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3837 SWIZZLE_ENABLE, 1);
3838 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3839 ELEMENT_SIZE, 1);
3840 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3841 INDEX_STRIDE, 3);
3842 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3843
3844 mutex_lock(&adev->srbm_mutex);
3845 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3846 vi_srbm_select(adev, 0, 0, 0, i);
3847
3848 if (i == 0) {
3849 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3850 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3851 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3852 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3853 WREG32(mmSH_MEM_CONFIG, tmp);
3854 WREG32(mmSH_MEM_BASES, 0);
3855 } else {
3856 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3858 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860 WREG32(mmSH_MEM_CONFIG, tmp);
3861 tmp = adev->gmc.shared_aperture_start >> 48;
3862 WREG32(mmSH_MEM_BASES, tmp);
3863 }
3864
3865 WREG32(mmSH_MEM_APE1_BASE, 1);
3866 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3867 }
3868 vi_srbm_select(adev, 0, 0, 0, 0);
3869 mutex_unlock(&adev->srbm_mutex);
3870
3871 gfx_v8_0_init_compute_vmid(adev);
3872
3873 mutex_lock(&adev->grbm_idx_mutex);
3874
3875
3876
3877
3878 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3879
3880 WREG32(mmPA_SC_FIFO_SIZE,
3881 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3882 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3883 (adev->gfx.config.sc_prim_fifo_size_backend <<
3884 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3885 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3886 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3887 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3888 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3889
3890 tmp = RREG32(mmSPI_ARB_PRIORITY);
3891 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3892 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3893 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3894 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3895 WREG32(mmSPI_ARB_PRIORITY, tmp);
3896
3897 mutex_unlock(&adev->grbm_idx_mutex);
3898
3899}
3900
3901static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902{
3903 u32 i, j, k;
3904 u32 mask;
3905
3906 mutex_lock(&adev->grbm_idx_mutex);
3907 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3908 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3909 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3910 for (k = 0; k < adev->usec_timeout; k++) {
3911 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3912 break;
3913 udelay(1);
3914 }
3915 if (k == adev->usec_timeout) {
3916 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3917 0xffffffff, 0xffffffff);
3918 mutex_unlock(&adev->grbm_idx_mutex);
3919 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3920 i, j);
3921 return;
3922 }
3923 }
3924 }
3925 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3926 mutex_unlock(&adev->grbm_idx_mutex);
3927
3928 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3929 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3930 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3931 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3932 for (k = 0; k < adev->usec_timeout; k++) {
3933 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3934 break;
3935 udelay(1);
3936 }
3937}
3938
3939static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3940 bool enable)
3941{
3942 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3943
3944 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3945 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3946 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3947 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3948
3949 WREG32(mmCP_INT_CNTL_RING0, tmp);
3950}
3951
3952static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3953{
3954
3955 WREG32(mmRLC_CSIB_ADDR_HI,
3956 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3957 WREG32(mmRLC_CSIB_ADDR_LO,
3958 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3959 WREG32(mmRLC_CSIB_LENGTH,
3960 adev->gfx.rlc.clear_state_size);
3961}
3962
3963static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3964 int ind_offset,
3965 int list_size,
3966 int *unique_indices,
3967 int *indices_count,
3968 int max_indices,
3969 int *ind_start_offsets,
3970 int *offset_count,
3971 int max_offset)
3972{
3973 int indices;
3974 bool new_entry = true;
3975
3976 for (; ind_offset < list_size; ind_offset++) {
3977
3978 if (new_entry) {
3979 new_entry = false;
3980 ind_start_offsets[*offset_count] = ind_offset;
3981 *offset_count = *offset_count + 1;
3982 BUG_ON(*offset_count >= max_offset);
3983 }
3984
3985 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3986 new_entry = true;
3987 continue;
3988 }
3989
3990 ind_offset += 2;
3991
3992
3993 for (indices = 0;
3994 indices < *indices_count;
3995 indices++) {
3996 if (unique_indices[indices] ==
3997 register_list_format[ind_offset])
3998 break;
3999 }
4000
4001 if (indices >= *indices_count) {
4002 unique_indices[*indices_count] =
4003 register_list_format[ind_offset];
4004 indices = *indices_count;
4005 *indices_count = *indices_count + 1;
4006 BUG_ON(*indices_count >= max_indices);
4007 }
4008
4009 register_list_format[ind_offset] = indices;
4010 }
4011}
4012
4013static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4014{
4015 int i, temp, data;
4016 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4017 int indices_count = 0;
4018 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4019 int offset_count = 0;
4020
4021 int list_size;
4022 unsigned int *register_list_format =
4023 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4024 if (!register_list_format)
4025 return -ENOMEM;
4026 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4027 adev->gfx.rlc.reg_list_format_size_bytes);
4028
4029 gfx_v8_0_parse_ind_reg_list(register_list_format,
4030 RLC_FormatDirectRegListLength,
4031 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4032 unique_indices,
4033 &indices_count,
4034 ARRAY_SIZE(unique_indices),
4035 indirect_start_offsets,
4036 &offset_count,
4037 ARRAY_SIZE(indirect_start_offsets));
4038
4039
4040 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4041
4042 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4043 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4044 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4045
4046
4047 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4048 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4049 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4050
4051 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4052 list_size = list_size >> 1;
4053 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4054 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4055
4056
4057 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4058 adev->gfx.rlc.starting_offsets_start);
4059 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4060 WREG32(mmRLC_GPM_SCRATCH_DATA,
4061 indirect_start_offsets[i]);
4062
4063
4064 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4065 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4066 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4067 if (unique_indices[i] != 0) {
4068 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4069 WREG32(data + i, unique_indices[i] >> 20);
4070 }
4071 }
4072 kfree(register_list_format);
4073
4074 return 0;
4075}
4076
4077static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4078{
4079 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4080}
4081
4082static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4083{
4084 uint32_t data;
4085
4086 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4087
4088 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4089 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4090 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4091 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4092 WREG32(mmRLC_PG_DELAY, data);
4093
4094 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4095 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4096
4097}
4098
4099static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4100 bool enable)
4101{
4102 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4103}
4104
4105static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4106 bool enable)
4107{
4108 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4109}
4110
4111static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4112{
4113 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4114}
4115
4116static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4117{
4118 if ((adev->asic_type == CHIP_CARRIZO) ||
4119 (adev->asic_type == CHIP_STONEY)) {
4120 gfx_v8_0_init_csb(adev);
4121 gfx_v8_0_init_save_restore_list(adev);
4122 gfx_v8_0_enable_save_restore_machine(adev);
4123 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4124 gfx_v8_0_init_power_gating(adev);
4125 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4126 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4127 (adev->asic_type == CHIP_POLARIS12) ||
4128 (adev->asic_type == CHIP_VEGAM)) {
4129 gfx_v8_0_init_csb(adev);
4130 gfx_v8_0_init_save_restore_list(adev);
4131 gfx_v8_0_enable_save_restore_machine(adev);
4132 gfx_v8_0_init_power_gating(adev);
4133 }
4134
4135}
4136
4137static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4138{
4139 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4140
4141 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4142 gfx_v8_0_wait_for_rlc_serdes(adev);
4143}
4144
4145static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4146{
4147 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4148 udelay(50);
4149
4150 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4151 udelay(50);
4152}
4153
4154static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4155{
4156 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4157
4158
4159 if (!(adev->flags & AMD_IS_APU))
4160 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161
4162 udelay(50);
4163}
4164
4165static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4166{
4167 const struct rlc_firmware_header_v2_0 *hdr;
4168 const __le32 *fw_data;
4169 unsigned i, fw_size;
4170
4171 if (!adev->gfx.rlc_fw)
4172 return -EINVAL;
4173
4174 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4175 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4176
4177 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4178 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4179 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4180
4181 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4182 for (i = 0; i < fw_size; i++)
4183 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4184 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4185
4186 return 0;
4187}
4188
4189static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4190{
4191 int r;
4192 u32 tmp;
4193
4194 gfx_v8_0_rlc_stop(adev);
4195
4196
4197 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4198 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4199 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4200 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4201 if (adev->asic_type == CHIP_POLARIS11 ||
4202 adev->asic_type == CHIP_POLARIS10 ||
4203 adev->asic_type == CHIP_POLARIS12 ||
4204 adev->asic_type == CHIP_VEGAM) {
4205 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4206 tmp &= ~0x3;
4207 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4208 }
4209
4210
4211 WREG32(mmRLC_PG_CNTL, 0);
4212
4213 gfx_v8_0_rlc_reset(adev);
4214 gfx_v8_0_init_pg(adev);
4215
4216
4217 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4218
4219 r = gfx_v8_0_rlc_load_microcode(adev);
4220 if (r)
4221 return r;
4222 }
4223
4224 gfx_v8_0_rlc_start(adev);
4225
4226 return 0;
4227}
4228
4229static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4230{
4231 int i;
4232 u32 tmp = RREG32(mmCP_ME_CNTL);
4233
4234 if (enable) {
4235 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4236 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4237 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238 } else {
4239 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4240 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4241 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4242 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4243 adev->gfx.gfx_ring[i].ready = false;
4244 }
4245 WREG32(mmCP_ME_CNTL, tmp);
4246 udelay(50);
4247}
4248
4249static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250{
4251 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4252 const struct gfx_firmware_header_v1_0 *ce_hdr;
4253 const struct gfx_firmware_header_v1_0 *me_hdr;
4254 const __le32 *fw_data;
4255 unsigned i, fw_size;
4256
4257 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4258 return -EINVAL;
4259
4260 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4261 adev->gfx.pfp_fw->data;
4262 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4263 adev->gfx.ce_fw->data;
4264 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4265 adev->gfx.me_fw->data;
4266
4267 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4268 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4269 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4270
4271 gfx_v8_0_cp_gfx_enable(adev, false);
4272
4273
4274 fw_data = (const __le32 *)
4275 (adev->gfx.pfp_fw->data +
4276 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4277 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4278 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4279 for (i = 0; i < fw_size; i++)
4280 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4281 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4282
4283
4284 fw_data = (const __le32 *)
4285 (adev->gfx.ce_fw->data +
4286 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4287 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4288 WREG32(mmCP_CE_UCODE_ADDR, 0);
4289 for (i = 0; i < fw_size; i++)
4290 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4291 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4292
4293
4294 fw_data = (const __le32 *)
4295 (adev->gfx.me_fw->data +
4296 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4297 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4298 WREG32(mmCP_ME_RAM_WADDR, 0);
4299 for (i = 0; i < fw_size; i++)
4300 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4301 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4302
4303 return 0;
4304}
4305
4306static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4307{
4308 u32 count = 0;
4309 const struct cs_section_def *sect = NULL;
4310 const struct cs_extent_def *ext = NULL;
4311
4312
4313 count += 2;
4314
4315 count += 3;
4316
4317 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4318 for (ext = sect->section; ext->extent != NULL; ++ext) {
4319 if (sect->id == SECT_CONTEXT)
4320 count += 2 + ext->reg_count;
4321 else
4322 return 0;
4323 }
4324 }
4325
4326 count += 4;
4327
4328 count += 2;
4329
4330 count += 2;
4331
4332 return count;
4333}
4334
4335static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336{
4337 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4338 const struct cs_section_def *sect = NULL;
4339 const struct cs_extent_def *ext = NULL;
4340 int r, i;
4341
4342
4343 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4344 WREG32(mmCP_ENDIAN_SWAP, 0);
4345 WREG32(mmCP_DEVICE_ID, 1);
4346
4347 gfx_v8_0_cp_gfx_enable(adev, true);
4348
4349 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4350 if (r) {
4351 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4352 return r;
4353 }
4354
4355
4356 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4357 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358
4359 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4360 amdgpu_ring_write(ring, 0x80000000);
4361 amdgpu_ring_write(ring, 0x80000000);
4362
4363 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4364 for (ext = sect->section; ext->extent != NULL; ++ext) {
4365 if (sect->id == SECT_CONTEXT) {
4366 amdgpu_ring_write(ring,
4367 PACKET3(PACKET3_SET_CONTEXT_REG,
4368 ext->reg_count));
4369 amdgpu_ring_write(ring,
4370 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4371 for (i = 0; i < ext->reg_count; i++)
4372 amdgpu_ring_write(ring, ext->extent[i]);
4373 }
4374 }
4375 }
4376
4377 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4378 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4379 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4380 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4381
4382 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4383 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384
4385 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4386 amdgpu_ring_write(ring, 0);
4387
4388
4389 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4390 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4391 amdgpu_ring_write(ring, 0x8000);
4392 amdgpu_ring_write(ring, 0x8000);
4393
4394 amdgpu_ring_commit(ring);
4395
4396 return 0;
4397}
4398static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4399{
4400 u32 tmp;
4401
4402 if (adev->asic_type == CHIP_TOPAZ)
4403 return;
4404
4405 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406
4407 if (ring->use_doorbell) {
4408 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4409 DOORBELL_OFFSET, ring->doorbell_index);
4410 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411 DOORBELL_HIT, 0);
4412 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4413 DOORBELL_EN, 1);
4414 } else {
4415 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4416 }
4417
4418 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419
4420 if (adev->flags & AMD_IS_APU)
4421 return;
4422
4423 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4424 DOORBELL_RANGE_LOWER,
4425 AMDGPU_DOORBELL_GFX_RING0);
4426 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427
4428 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4429 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4430}
4431
4432static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433{
4434 struct amdgpu_ring *ring;
4435 u32 tmp;
4436 u32 rb_bufsz;
4437 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4438 int r;
4439
4440
4441 WREG32(mmCP_RB_WPTR_DELAY, 0);
4442
4443
4444 WREG32(mmCP_RB_VMID, 0);
4445
4446
4447 ring = &adev->gfx.gfx_ring[0];
4448 rb_bufsz = order_base_2(ring->ring_size / 8);
4449 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4450 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4451 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4452 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453#ifdef __BIG_ENDIAN
4454 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455#endif
4456 WREG32(mmCP_RB0_CNTL, tmp);
4457
4458
4459 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460 ring->wptr = 0;
4461 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4462
4463
4464 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4466 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467
4468 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4470 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4471 mdelay(1);
4472 WREG32(mmCP_RB0_CNTL, tmp);
4473
4474 rb_addr = ring->gpu_addr >> 8;
4475 WREG32(mmCP_RB0_BASE, rb_addr);
4476 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477
4478 gfx_v8_0_set_cpg_door_bell(adev, ring);
4479
4480 amdgpu_ring_clear_ring(ring);
4481 gfx_v8_0_cp_gfx_start(adev);
4482 ring->ready = true;
4483 r = amdgpu_ring_test_ring(ring);
4484 if (r)
4485 ring->ready = false;
4486
4487 return r;
4488}
4489
4490static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4491{
4492 int i;
4493
4494 if (enable) {
4495 WREG32(mmCP_MEC_CNTL, 0);
4496 } else {
4497 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4498 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4499 adev->gfx.compute_ring[i].ready = false;
4500 adev->gfx.kiq.ring.ready = false;
4501 }
4502 udelay(50);
4503}
4504
4505static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506{
4507 const struct gfx_firmware_header_v1_0 *mec_hdr;
4508 const __le32 *fw_data;
4509 unsigned i, fw_size;
4510
4511 if (!adev->gfx.mec_fw)
4512 return -EINVAL;
4513
4514 gfx_v8_0_cp_compute_enable(adev, false);
4515
4516 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4517 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4518
4519 fw_data = (const __le32 *)
4520 (adev->gfx.mec_fw->data +
4521 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4522 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4523
4524
4525 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4526 for (i = 0; i < fw_size; i++)
4527 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4528 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529
4530
4531 if (adev->gfx.mec2_fw) {
4532 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533
4534 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4535 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4536
4537 fw_data = (const __le32 *)
4538 (adev->gfx.mec2_fw->data +
4539 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4540 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541
4542 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4543 for (i = 0; i < fw_size; i++)
4544 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4545 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4546 }
4547
4548 return 0;
4549}
4550
4551
4552static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4553{
4554 uint32_t tmp;
4555 struct amdgpu_device *adev = ring->adev;
4556
4557
4558 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559 tmp &= 0xffffff00;
4560 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4561 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562 tmp |= 0x80;
4563 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4564}
4565
4566static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4567{
4568 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4569 uint32_t scratch, tmp = 0;
4570 uint64_t queue_mask = 0;
4571 int r, i;
4572
4573 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4574 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575 continue;
4576
4577
4578
4579
4580 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4581 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4582 break;
4583 }
4584
4585 queue_mask |= (1ull << i);
4586 }
4587
4588 r = amdgpu_gfx_scratch_get(adev, &scratch);
4589 if (r) {
4590 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4591 return r;
4592 }
4593 WREG32(scratch, 0xCAFEDEAD);
4594
4595 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4596 if (r) {
4597 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4598 amdgpu_gfx_scratch_free(adev, scratch);
4599 return r;
4600 }
4601
4602 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4603 amdgpu_ring_write(kiq_ring, 0);
4604 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));
4605 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));
4606 amdgpu_ring_write(kiq_ring, 0);
4607 amdgpu_ring_write(kiq_ring, 0);
4608 amdgpu_ring_write(kiq_ring, 0);
4609 amdgpu_ring_write(kiq_ring, 0);
4610 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4611 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4612 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4613 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4614
4615
4616 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4617
4618 amdgpu_ring_write(kiq_ring,
4619 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4620 amdgpu_ring_write(kiq_ring,
4621 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4622 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4623 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4624 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1));
4625 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4626 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4627 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4628 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4629 }
4630
4631 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4632 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4633 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4634 amdgpu_ring_commit(kiq_ring);
4635
4636 for (i = 0; i < adev->usec_timeout; i++) {
4637 tmp = RREG32(scratch);
4638 if (tmp == 0xDEADBEEF)
4639 break;
4640 DRM_UDELAY(1);
4641 }
4642 if (i >= adev->usec_timeout) {
4643 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4644 scratch, tmp);
4645 r = -EINVAL;
4646 }
4647 amdgpu_gfx_scratch_free(adev, scratch);
4648
4649 return r;
4650}
4651
4652static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4653{
4654 int i, r = 0;
4655
4656 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4657 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4658 for (i = 0; i < adev->usec_timeout; i++) {
4659 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4660 break;
4661 udelay(1);
4662 }
4663 if (i == adev->usec_timeout)
4664 r = -ETIMEDOUT;
4665 }
4666 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4667 WREG32(mmCP_HQD_PQ_RPTR, 0);
4668 WREG32(mmCP_HQD_PQ_WPTR, 0);
4669
4670 return r;
4671}
4672
4673static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4674{
4675 struct amdgpu_device *adev = ring->adev;
4676 struct vi_mqd *mqd = ring->mqd_ptr;
4677 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4678 uint32_t tmp;
4679
4680 mqd->header = 0xC0310800;
4681 mqd->compute_pipelinestat_enable = 0x00000001;
4682 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4683 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4684 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4685 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4686 mqd->compute_misc_reserved = 0x00000003;
4687 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4688 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4689 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4690 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4691 eop_base_addr = ring->eop_gpu_addr >> 8;
4692 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4693 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4694
4695
4696 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4697 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4698 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4699
4700 mqd->cp_hqd_eop_control = tmp;
4701
4702
4703 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4704 CP_HQD_PQ_DOORBELL_CONTROL,
4705 DOORBELL_EN,
4706 ring->use_doorbell ? 1 : 0);
4707
4708 mqd->cp_hqd_pq_doorbell_control = tmp;
4709
4710
4711 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4712 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4713
4714
4715 tmp = RREG32(mmCP_MQD_CONTROL);
4716 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4717 mqd->cp_mqd_control = tmp;
4718
4719
4720 hqd_gpu_addr = ring->gpu_addr >> 8;
4721 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4722 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4723
4724
4725 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4726 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4727 (order_base_2(ring->ring_size / 4) - 1));
4728 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4729 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4730#ifdef __BIG_ENDIAN
4731 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4732#endif
4733 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4735 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4737 mqd->cp_hqd_pq_control = tmp;
4738
4739
4740 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4741 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4742 mqd->cp_hqd_pq_rptr_report_addr_hi =
4743 upper_32_bits(wb_gpu_addr) & 0xffff;
4744
4745
4746 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4747 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4748 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4749
4750 tmp = 0;
4751
4752 if (ring->use_doorbell) {
4753 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4755 DOORBELL_OFFSET, ring->doorbell_index);
4756
4757 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4758 DOORBELL_EN, 1);
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4760 DOORBELL_SOURCE, 0);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4762 DOORBELL_HIT, 0);
4763 }
4764
4765 mqd->cp_hqd_pq_doorbell_control = tmp;
4766
4767
4768 ring->wptr = 0;
4769 mqd->cp_hqd_pq_wptr = ring->wptr;
4770 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4771
4772
4773 mqd->cp_hqd_vmid = 0;
4774
4775 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4776 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4777 mqd->cp_hqd_persistent_state = tmp;
4778
4779
4780 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4781 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4783 mqd->cp_hqd_ib_control = tmp;
4784
4785 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4787 mqd->cp_hqd_iq_timer = tmp;
4788
4789 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4790 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4791 mqd->cp_hqd_ctx_save_control = tmp;
4792
4793
4794 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4795 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4796 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4797 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4798 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4799 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4800 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4801 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4802 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4803 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4804 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4805 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4806 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4807 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4808 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4809
4810
4811 mqd->cp_hqd_active = 1;
4812
4813 return 0;
4814}
4815
4816int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4817 struct vi_mqd *mqd)
4818{
4819 uint32_t mqd_reg;
4820 uint32_t *mqd_data;
4821
4822
4823 mqd_data = &mqd->cp_mqd_base_addr_lo;
4824
4825
4826 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4827
4828
4829 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4830 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4831
4832
4833
4834
4835
4836
4837 if (adev->asic_type != CHIP_TONGA) {
4838 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4839 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4840 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4841 }
4842
4843 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4844 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4845
4846
4847 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4848 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4849
4850 return 0;
4851}
4852
4853static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4854{
4855 struct amdgpu_device *adev = ring->adev;
4856 struct vi_mqd *mqd = ring->mqd_ptr;
4857 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4858
4859 gfx_v8_0_kiq_setting(ring);
4860
4861 if (adev->in_gpu_reset) {
4862
4863 if (adev->gfx.mec.mqd_backup[mqd_idx])
4864 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4865
4866
4867 ring->wptr = 0;
4868 amdgpu_ring_clear_ring(ring);
4869 mutex_lock(&adev->srbm_mutex);
4870 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4871 gfx_v8_0_mqd_commit(adev, mqd);
4872 vi_srbm_select(adev, 0, 0, 0, 0);
4873 mutex_unlock(&adev->srbm_mutex);
4874 } else {
4875 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4876 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4877 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4878 mutex_lock(&adev->srbm_mutex);
4879 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4880 gfx_v8_0_mqd_init(ring);
4881 gfx_v8_0_mqd_commit(adev, mqd);
4882 vi_srbm_select(adev, 0, 0, 0, 0);
4883 mutex_unlock(&adev->srbm_mutex);
4884
4885 if (adev->gfx.mec.mqd_backup[mqd_idx])
4886 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887 }
4888
4889 return 0;
4890}
4891
4892static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4893{
4894 struct amdgpu_device *adev = ring->adev;
4895 struct vi_mqd *mqd = ring->mqd_ptr;
4896 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4897
4898 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4899 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4900 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4901 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4902 mutex_lock(&adev->srbm_mutex);
4903 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4904 gfx_v8_0_mqd_init(ring);
4905 vi_srbm_select(adev, 0, 0, 0, 0);
4906 mutex_unlock(&adev->srbm_mutex);
4907
4908 if (adev->gfx.mec.mqd_backup[mqd_idx])
4909 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4910 } else if (adev->in_gpu_reset) {
4911
4912 if (adev->gfx.mec.mqd_backup[mqd_idx])
4913 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4914
4915 ring->wptr = 0;
4916 amdgpu_ring_clear_ring(ring);
4917 } else {
4918 amdgpu_ring_clear_ring(ring);
4919 }
4920 return 0;
4921}
4922
4923static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4924{
4925 if (adev->asic_type > CHIP_TONGA) {
4926 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4927 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4928 }
4929
4930 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4931}
4932
4933static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4934{
4935 struct amdgpu_ring *ring = NULL;
4936 int r = 0, i;
4937
4938 gfx_v8_0_cp_compute_enable(adev, true);
4939
4940 ring = &adev->gfx.kiq.ring;
4941
4942 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4943 if (unlikely(r != 0))
4944 goto done;
4945
4946 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947 if (!r) {
4948 r = gfx_v8_0_kiq_init_queue(ring);
4949 amdgpu_bo_kunmap(ring->mqd_obj);
4950 ring->mqd_ptr = NULL;
4951 }
4952 amdgpu_bo_unreserve(ring->mqd_obj);
4953 if (r)
4954 goto done;
4955
4956 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4957 ring = &adev->gfx.compute_ring[i];
4958
4959 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4960 if (unlikely(r != 0))
4961 goto done;
4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963 if (!r) {
4964 r = gfx_v8_0_kcq_init_queue(ring);
4965 amdgpu_bo_kunmap(ring->mqd_obj);
4966 ring->mqd_ptr = NULL;
4967 }
4968 amdgpu_bo_unreserve(ring->mqd_obj);
4969 if (r)
4970 goto done;
4971 }
4972
4973 gfx_v8_0_set_mec_doorbell_range(adev);
4974
4975 r = gfx_v8_0_kiq_kcq_enable(adev);
4976 if (r)
4977 goto done;
4978
4979
4980 ring = &adev->gfx.kiq.ring;
4981 ring->ready = true;
4982 r = amdgpu_ring_test_ring(ring);
4983 if (r) {
4984 ring->ready = false;
4985 goto done;
4986 }
4987
4988
4989 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4990 ring = &adev->gfx.compute_ring[i];
4991 ring->ready = true;
4992 r = amdgpu_ring_test_ring(ring);
4993 if (r)
4994 ring->ready = false;
4995 }
4996
4997done:
4998 return r;
4999}
5000
5001static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5002{
5003 int r;
5004
5005 if (!(adev->flags & AMD_IS_APU))
5006 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5007
5008 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5009
5010 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5011 if (r)
5012 return r;
5013
5014 r = gfx_v8_0_cp_compute_load_microcode(adev);
5015 if (r)
5016 return r;
5017 }
5018
5019 r = gfx_v8_0_cp_gfx_resume(adev);
5020 if (r)
5021 return r;
5022
5023 r = gfx_v8_0_kiq_resume(adev);
5024 if (r)
5025 return r;
5026
5027 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5028
5029 return 0;
5030}
5031
5032static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033{
5034 gfx_v8_0_cp_gfx_enable(adev, enable);
5035 gfx_v8_0_cp_compute_enable(adev, enable);
5036}
5037
5038static int gfx_v8_0_hw_init(void *handle)
5039{
5040 int r;
5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043 gfx_v8_0_init_golden_registers(adev);
5044 gfx_v8_0_gpu_init(adev);
5045
5046 r = gfx_v8_0_rlc_resume(adev);
5047 if (r)
5048 return r;
5049
5050 r = gfx_v8_0_cp_resume(adev);
5051
5052 return r;
5053}
5054
5055static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5056{
5057 struct amdgpu_device *adev = kiq_ring->adev;
5058 uint32_t scratch, tmp = 0;
5059 int r, i;
5060
5061 r = amdgpu_gfx_scratch_get(adev, &scratch);
5062 if (r) {
5063 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5064 return r;
5065 }
5066 WREG32(scratch, 0xCAFEDEAD);
5067
5068 r = amdgpu_ring_alloc(kiq_ring, 10);
5069 if (r) {
5070 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5071 amdgpu_gfx_scratch_free(adev, scratch);
5072 return r;
5073 }
5074
5075
5076 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5077 amdgpu_ring_write(kiq_ring,
5078 PACKET3_UNMAP_QUEUES_ACTION(1) |
5079 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5080 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5081 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5082 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5083 amdgpu_ring_write(kiq_ring, 0);
5084 amdgpu_ring_write(kiq_ring, 0);
5085 amdgpu_ring_write(kiq_ring, 0);
5086
5087 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5088 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5089 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5090 amdgpu_ring_commit(kiq_ring);
5091
5092 for (i = 0; i < adev->usec_timeout; i++) {
5093 tmp = RREG32(scratch);
5094 if (tmp == 0xDEADBEEF)
5095 break;
5096 DRM_UDELAY(1);
5097 }
5098 if (i >= adev->usec_timeout) {
5099 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5100 r = -EINVAL;
5101 }
5102 amdgpu_gfx_scratch_free(adev, scratch);
5103 return r;
5104}
5105
5106static int gfx_v8_0_hw_fini(void *handle)
5107{
5108 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5109 int i;
5110
5111 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5112 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5113
5114
5115 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5117
5118 if (amdgpu_sriov_vf(adev)) {
5119 pr_debug("For SRIOV client, shouldn't do anything.\n");
5120 return 0;
5121 }
5122 gfx_v8_0_cp_enable(adev, false);
5123 gfx_v8_0_rlc_stop(adev);
5124
5125 amdgpu_device_ip_set_powergating_state(adev,
5126 AMD_IP_BLOCK_TYPE_GFX,
5127 AMD_PG_STATE_UNGATE);
5128
5129 return 0;
5130}
5131
5132static int gfx_v8_0_suspend(void *handle)
5133{
5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135 adev->gfx.in_suspend = true;
5136 return gfx_v8_0_hw_fini(adev);
5137}
5138
5139static int gfx_v8_0_resume(void *handle)
5140{
5141 int r;
5142 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143
5144 r = gfx_v8_0_hw_init(adev);
5145 adev->gfx.in_suspend = false;
5146 return r;
5147}
5148
5149static bool gfx_v8_0_is_idle(void *handle)
5150{
5151 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152
5153 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5154 return false;
5155 else
5156 return true;
5157}
5158
5159static int gfx_v8_0_wait_for_idle(void *handle)
5160{
5161 unsigned i;
5162 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163
5164 for (i = 0; i < adev->usec_timeout; i++) {
5165 if (gfx_v8_0_is_idle(handle))
5166 return 0;
5167
5168 udelay(1);
5169 }
5170 return -ETIMEDOUT;
5171}
5172
5173static bool gfx_v8_0_check_soft_reset(void *handle)
5174{
5175 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5177 u32 tmp;
5178
5179
5180 tmp = RREG32(mmGRBM_STATUS);
5181 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5182 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5183 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5184 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5185 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5186 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5187 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5188 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5189 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5190 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5191 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5192 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194 }
5195
5196
5197 tmp = RREG32(mmGRBM_STATUS2);
5198 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5199 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5200 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5201
5202 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5203 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5204 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5205 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206 SOFT_RESET_CPF, 1);
5207 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208 SOFT_RESET_CPC, 1);
5209 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210 SOFT_RESET_CPG, 1);
5211 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5212 SOFT_RESET_GRBM, 1);
5213 }
5214
5215
5216 tmp = RREG32(mmSRBM_STATUS);
5217 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5218 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5219 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5220 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5221 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5222 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5223
5224 if (grbm_soft_reset || srbm_soft_reset) {
5225 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5226 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5227 return true;
5228 } else {
5229 adev->gfx.grbm_soft_reset = 0;
5230 adev->gfx.srbm_soft_reset = 0;
5231 return false;
5232 }
5233}
5234
5235static int gfx_v8_0_pre_soft_reset(void *handle)
5236{
5237 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5239
5240 if ((!adev->gfx.grbm_soft_reset) &&
5241 (!adev->gfx.srbm_soft_reset))
5242 return 0;
5243
5244 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246
5247
5248 gfx_v8_0_rlc_stop(adev);
5249
5250 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5252
5253 gfx_v8_0_cp_gfx_enable(adev, false);
5254
5255 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5256 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5257 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5258 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5259 int i;
5260
5261 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5262 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5263
5264 mutex_lock(&adev->srbm_mutex);
5265 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5266 gfx_v8_0_deactivate_hqd(adev, 2);
5267 vi_srbm_select(adev, 0, 0, 0, 0);
5268 mutex_unlock(&adev->srbm_mutex);
5269 }
5270
5271 gfx_v8_0_cp_compute_enable(adev, false);
5272 }
5273
5274 return 0;
5275}
5276
5277static int gfx_v8_0_soft_reset(void *handle)
5278{
5279 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281 u32 tmp;
5282
5283 if ((!adev->gfx.grbm_soft_reset) &&
5284 (!adev->gfx.srbm_soft_reset))
5285 return 0;
5286
5287 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5288 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5289
5290 if (grbm_soft_reset || srbm_soft_reset) {
5291 tmp = RREG32(mmGMCON_DEBUG);
5292 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5293 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5294 WREG32(mmGMCON_DEBUG, tmp);
5295 udelay(50);
5296 }
5297
5298 if (grbm_soft_reset) {
5299 tmp = RREG32(mmGRBM_SOFT_RESET);
5300 tmp |= grbm_soft_reset;
5301 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5302 WREG32(mmGRBM_SOFT_RESET, tmp);
5303 tmp = RREG32(mmGRBM_SOFT_RESET);
5304
5305 udelay(50);
5306
5307 tmp &= ~grbm_soft_reset;
5308 WREG32(mmGRBM_SOFT_RESET, tmp);
5309 tmp = RREG32(mmGRBM_SOFT_RESET);
5310 }
5311
5312 if (srbm_soft_reset) {
5313 tmp = RREG32(mmSRBM_SOFT_RESET);
5314 tmp |= srbm_soft_reset;
5315 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5316 WREG32(mmSRBM_SOFT_RESET, tmp);
5317 tmp = RREG32(mmSRBM_SOFT_RESET);
5318
5319 udelay(50);
5320
5321 tmp &= ~srbm_soft_reset;
5322 WREG32(mmSRBM_SOFT_RESET, tmp);
5323 tmp = RREG32(mmSRBM_SOFT_RESET);
5324 }
5325
5326 if (grbm_soft_reset || srbm_soft_reset) {
5327 tmp = RREG32(mmGMCON_DEBUG);
5328 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5329 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5330 WREG32(mmGMCON_DEBUG, tmp);
5331 }
5332
5333
5334 udelay(50);
5335
5336 return 0;
5337}
5338
5339static int gfx_v8_0_post_soft_reset(void *handle)
5340{
5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343
5344 if ((!adev->gfx.grbm_soft_reset) &&
5345 (!adev->gfx.srbm_soft_reset))
5346 return 0;
5347
5348 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350
5351 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353 gfx_v8_0_cp_gfx_resume(adev);
5354
5355 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359 int i;
5360
5361 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
5364 mutex_lock(&adev->srbm_mutex);
5365 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5366 gfx_v8_0_deactivate_hqd(adev, 2);
5367 vi_srbm_select(adev, 0, 0, 0, 0);
5368 mutex_unlock(&adev->srbm_mutex);
5369 }
5370 gfx_v8_0_kiq_resume(adev);
5371 }
5372 gfx_v8_0_rlc_start(adev);
5373
5374 return 0;
5375}
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5386{
5387 uint64_t clock;
5388
5389 mutex_lock(&adev->gfx.gpu_clock_mutex);
5390 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5391 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5392 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5393 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5394 return clock;
5395}
5396
5397static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398 uint32_t vmid,
5399 uint32_t gds_base, uint32_t gds_size,
5400 uint32_t gws_base, uint32_t gws_size,
5401 uint32_t oa_base, uint32_t oa_size)
5402{
5403 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5404 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405
5406 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5407 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408
5409 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5410 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5411
5412
5413 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5414 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5415 WRITE_DATA_DST_SEL(0)));
5416 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5417 amdgpu_ring_write(ring, 0);
5418 amdgpu_ring_write(ring, gds_base);
5419
5420
5421 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5422 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5423 WRITE_DATA_DST_SEL(0)));
5424 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5425 amdgpu_ring_write(ring, 0);
5426 amdgpu_ring_write(ring, gds_size);
5427
5428
5429 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5430 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5431 WRITE_DATA_DST_SEL(0)));
5432 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5433 amdgpu_ring_write(ring, 0);
5434 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5435
5436
5437 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5439 WRITE_DATA_DST_SEL(0)));
5440 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5441 amdgpu_ring_write(ring, 0);
5442 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5443}
5444
5445static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5446{
5447 WREG32(mmSQ_IND_INDEX,
5448 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5449 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5450 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5451 (SQ_IND_INDEX__FORCE_READ_MASK));
5452 return RREG32(mmSQ_IND_DATA);
5453}
5454
5455static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5456 uint32_t wave, uint32_t thread,
5457 uint32_t regno, uint32_t num, uint32_t *out)
5458{
5459 WREG32(mmSQ_IND_INDEX,
5460 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5461 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5462 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5463 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5464 (SQ_IND_INDEX__FORCE_READ_MASK) |
5465 (SQ_IND_INDEX__AUTO_INCR_MASK));
5466 while (num--)
5467 *(out++) = RREG32(mmSQ_IND_DATA);
5468}
5469
5470static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5471{
5472
5473 dst[(*no_fields)++] = 0;
5474 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5475 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5476 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5477 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5478 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5479 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5480 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5481 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5482 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5483 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5484 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5485 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5486 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5487 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5488 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5489 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5490 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5491 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5492}
5493
5494static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5495 uint32_t wave, uint32_t start,
5496 uint32_t size, uint32_t *dst)
5497{
5498 wave_read_regs(
5499 adev, simd, wave, 0,
5500 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5501}
5502
5503
5504static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5505 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5506 .select_se_sh = &gfx_v8_0_select_se_sh,
5507 .read_wave_data = &gfx_v8_0_read_wave_data,
5508 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5509 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5510};
5511
5512static int gfx_v8_0_early_init(void *handle)
5513{
5514 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5515
5516 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5517 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5518 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5519 gfx_v8_0_set_ring_funcs(adev);
5520 gfx_v8_0_set_irq_funcs(adev);
5521 gfx_v8_0_set_gds_init(adev);
5522 gfx_v8_0_set_rlc_funcs(adev);
5523
5524 return 0;
5525}
5526
5527static int gfx_v8_0_late_init(void *handle)
5528{
5529 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530 int r;
5531
5532 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5533 if (r)
5534 return r;
5535
5536 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5537 if (r)
5538 return r;
5539
5540
5541 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5542 if (r)
5543 return r;
5544
5545 amdgpu_device_ip_set_powergating_state(adev,
5546 AMD_IP_BLOCK_TYPE_GFX,
5547 AMD_PG_STATE_GATE);
5548
5549 return 0;
5550}
5551
5552static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5553 bool enable)
5554{
5555 if ((adev->asic_type == CHIP_POLARIS11) ||
5556 (adev->asic_type == CHIP_POLARIS12) ||
5557 (adev->asic_type == CHIP_VEGAM))
5558
5559 amdgpu_device_ip_set_powergating_state(adev,
5560 AMD_IP_BLOCK_TYPE_SMC,
5561 enable ?
5562 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5563
5564 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5565}
5566
5567static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5568 bool enable)
5569{
5570 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5571}
5572
5573static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5574 bool enable)
5575{
5576 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5577}
5578
5579static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5580 bool enable)
5581{
5582 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5583}
5584
5585static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5586 bool enable)
5587{
5588 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5589
5590
5591 if (!enable)
5592 RREG32(mmDB_RENDER_CONTROL);
5593}
5594
5595static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5596 bool enable)
5597{
5598 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5599 cz_enable_gfx_cg_power_gating(adev, true);
5600 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5601 cz_enable_gfx_pipeline_power_gating(adev, true);
5602 } else {
5603 cz_enable_gfx_cg_power_gating(adev, false);
5604 cz_enable_gfx_pipeline_power_gating(adev, false);
5605 }
5606}
5607
5608static int gfx_v8_0_set_powergating_state(void *handle,
5609 enum amd_powergating_state state)
5610{
5611 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5612 bool enable = (state == AMD_PG_STATE_GATE);
5613
5614 if (amdgpu_sriov_vf(adev))
5615 return 0;
5616
5617 switch (adev->asic_type) {
5618 case CHIP_CARRIZO:
5619 case CHIP_STONEY:
5620
5621 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5622 cz_enable_sck_slow_down_on_power_up(adev, true);
5623 cz_enable_sck_slow_down_on_power_down(adev, true);
5624 } else {
5625 cz_enable_sck_slow_down_on_power_up(adev, false);
5626 cz_enable_sck_slow_down_on_power_down(adev, false);
5627 }
5628 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5629 cz_enable_cp_power_gating(adev, true);
5630 else
5631 cz_enable_cp_power_gating(adev, false);
5632
5633 cz_update_gfx_cg_power_gating(adev, enable);
5634
5635 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5636 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5637 else
5638 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5639
5640 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5641 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5642 else
5643 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5644 break;
5645 case CHIP_POLARIS11:
5646 case CHIP_POLARIS12:
5647 case CHIP_VEGAM:
5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5649 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5650 else
5651 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5652
5653 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5654 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5655 else
5656 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5657
5658 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5659 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5660 else
5661 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5662 break;
5663 default:
5664 break;
5665 }
5666
5667 return 0;
5668}
5669
5670static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5671{
5672 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5673 int data;
5674
5675 if (amdgpu_sriov_vf(adev))
5676 *flags = 0;
5677
5678
5679 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5681 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5682
5683
5684 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5686 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5687
5688
5689 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5690 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5691
5692
5693 data = RREG32(mmCGTS_SM_CTRL_REG);
5694 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5695 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5696
5697
5698 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5699 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5700
5701
5702 data = RREG32(mmRLC_MEM_SLP_CNTL);
5703 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5704 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5705
5706
5707 data = RREG32(mmCP_MEM_SLP_CNTL);
5708 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5709 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5710}
5711
5712static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5713 uint32_t reg_addr, uint32_t cmd)
5714{
5715 uint32_t data;
5716
5717 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5718
5719 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5720 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5721
5722 data = RREG32(mmRLC_SERDES_WR_CTRL);
5723 if (adev->asic_type == CHIP_STONEY)
5724 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5725 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5726 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5727 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5728 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5729 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5730 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5731 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5732 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5733 else
5734 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5735 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5736 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5737 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5738 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5739 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5740 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5741 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5742 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5743 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5744 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5745 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5746 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5747 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5748 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5749
5750 WREG32(mmRLC_SERDES_WR_CTRL, data);
5751}
5752
5753#define MSG_ENTER_RLC_SAFE_MODE 1
5754#define MSG_EXIT_RLC_SAFE_MODE 0
5755#define RLC_GPR_REG2__REQ_MASK 0x00000001
5756#define RLC_GPR_REG2__REQ__SHIFT 0
5757#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5758#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5759
5760static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761{
5762 u32 data;
5763 unsigned i;
5764
5765 data = RREG32(mmRLC_CNTL);
5766 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5767 return;
5768
5769 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5770 data |= RLC_SAFE_MODE__CMD_MASK;
5771 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5773 WREG32(mmRLC_SAFE_MODE, data);
5774
5775 for (i = 0; i < adev->usec_timeout; i++) {
5776 if ((RREG32(mmRLC_GPM_STAT) &
5777 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5778 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5779 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5780 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5781 break;
5782 udelay(1);
5783 }
5784
5785 for (i = 0; i < adev->usec_timeout; i++) {
5786 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5787 break;
5788 udelay(1);
5789 }
5790 adev->gfx.rlc.in_safe_mode = true;
5791 }
5792}
5793
5794static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5795{
5796 u32 data = 0;
5797 unsigned i;
5798
5799 data = RREG32(mmRLC_CNTL);
5800 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5801 return;
5802
5803 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5804 if (adev->gfx.rlc.in_safe_mode) {
5805 data |= RLC_SAFE_MODE__CMD_MASK;
5806 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5807 WREG32(mmRLC_SAFE_MODE, data);
5808 adev->gfx.rlc.in_safe_mode = false;
5809 }
5810 }
5811
5812 for (i = 0; i < adev->usec_timeout; i++) {
5813 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5814 break;
5815 udelay(1);
5816 }
5817}
5818
5819static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5820 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5821 .exit_safe_mode = iceland_exit_rlc_safe_mode
5822};
5823
5824static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5825 bool enable)
5826{
5827 uint32_t temp, data;
5828
5829 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5830
5831
5832 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5833 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5835
5836 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5837
5838 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5839 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5840 }
5841
5842
5843 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844 if (adev->flags & AMD_IS_APU)
5845 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5848 else
5849 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5850 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5851 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5852 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5853
5854 if (temp != data)
5855 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856
5857
5858 gfx_v8_0_wait_for_rlc_serdes(adev);
5859
5860
5861 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5862
5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5864
5865 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5866 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5867 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5868 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5869 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5870 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5871 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5872 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5873 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5874 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5875 if (temp != data)
5876 WREG32(mmCGTS_SM_CTRL_REG, data);
5877 }
5878 udelay(50);
5879
5880
5881 gfx_v8_0_wait_for_rlc_serdes(adev);
5882 } else {
5883
5884 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5885 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5886 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5887 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5888 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5889 if (temp != data)
5890 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5891
5892
5893 data = RREG32(mmRLC_MEM_SLP_CNTL);
5894 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5895 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5896 WREG32(mmRLC_MEM_SLP_CNTL, data);
5897 }
5898
5899
5900 data = RREG32(mmCP_MEM_SLP_CNTL);
5901 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5902 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5903 WREG32(mmCP_MEM_SLP_CNTL, data);
5904 }
5905
5906
5907 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5908 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5909 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5910 if (temp != data)
5911 WREG32(mmCGTS_SM_CTRL_REG, data);
5912
5913
5914 gfx_v8_0_wait_for_rlc_serdes(adev);
5915
5916
5917 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5918
5919 udelay(50);
5920
5921
5922 gfx_v8_0_wait_for_rlc_serdes(adev);
5923 }
5924
5925 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5926}
5927
5928static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5929 bool enable)
5930{
5931 uint32_t temp, temp1, data, data1;
5932
5933 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934
5935 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936
5937 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940 if (temp1 != data1)
5941 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942
5943
5944 gfx_v8_0_wait_for_rlc_serdes(adev);
5945
5946
5947 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5948
5949
5950 gfx_v8_0_wait_for_rlc_serdes(adev);
5951
5952
5953 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5954
5955
5956 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957
5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959
5960 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5961
5962 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5964
5965 if (temp1 != data1)
5966 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967 } else {
5968 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969 }
5970
5971 if (temp != data)
5972 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5973
5974
5975
5976
5977 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5978 } else {
5979
5980 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5981
5982
5983 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5984 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5985 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5986 if (temp1 != data1)
5987 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5988
5989
5990 RREG32(mmCB_CGTT_SCLK_CTRL);
5991 RREG32(mmCB_CGTT_SCLK_CTRL);
5992 RREG32(mmCB_CGTT_SCLK_CTRL);
5993 RREG32(mmCB_CGTT_SCLK_CTRL);
5994
5995
5996 gfx_v8_0_wait_for_rlc_serdes(adev);
5997
5998
5999 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6000
6001
6002 gfx_v8_0_wait_for_rlc_serdes(adev);
6003
6004
6005 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6006
6007
6008 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6009 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6010 if (temp != data)
6011 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6012
6013 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6014 }
6015
6016 gfx_v8_0_wait_for_rlc_serdes(adev);
6017
6018 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6019}
6020static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021 bool enable)
6022{
6023 if (enable) {
6024
6025
6026
6027 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6029 } else {
6030
6031
6032
6033 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6034 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6035 }
6036 return 0;
6037}
6038
6039static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6040 enum amd_clockgating_state state)
6041{
6042 uint32_t msg_id, pp_state = 0;
6043 uint32_t pp_support_state = 0;
6044
6045 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6047 pp_support_state = PP_STATE_SUPPORT_LS;
6048 pp_state = PP_STATE_LS;
6049 }
6050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6051 pp_support_state |= PP_STATE_SUPPORT_CG;
6052 pp_state |= PP_STATE_CG;
6053 }
6054 if (state == AMD_CG_STATE_UNGATE)
6055 pp_state = 0;
6056
6057 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058 PP_BLOCK_GFX_CG,
6059 pp_support_state,
6060 pp_state);
6061 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6062 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6063 }
6064
6065 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6066 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6067 pp_support_state = PP_STATE_SUPPORT_LS;
6068 pp_state = PP_STATE_LS;
6069 }
6070
6071 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6072 pp_support_state |= PP_STATE_SUPPORT_CG;
6073 pp_state |= PP_STATE_CG;
6074 }
6075
6076 if (state == AMD_CG_STATE_UNGATE)
6077 pp_state = 0;
6078
6079 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080 PP_BLOCK_GFX_MG,
6081 pp_support_state,
6082 pp_state);
6083 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6084 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6085 }
6086
6087 return 0;
6088}
6089
6090static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6091 enum amd_clockgating_state state)
6092{
6093
6094 uint32_t msg_id, pp_state = 0;
6095 uint32_t pp_support_state = 0;
6096
6097 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6099 pp_support_state = PP_STATE_SUPPORT_LS;
6100 pp_state = PP_STATE_LS;
6101 }
6102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6103 pp_support_state |= PP_STATE_SUPPORT_CG;
6104 pp_state |= PP_STATE_CG;
6105 }
6106 if (state == AMD_CG_STATE_UNGATE)
6107 pp_state = 0;
6108
6109 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110 PP_BLOCK_GFX_CG,
6111 pp_support_state,
6112 pp_state);
6113 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6114 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6115 }
6116
6117 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6118 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6119 pp_support_state = PP_STATE_SUPPORT_LS;
6120 pp_state = PP_STATE_LS;
6121 }
6122 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6123 pp_support_state |= PP_STATE_SUPPORT_CG;
6124 pp_state |= PP_STATE_CG;
6125 }
6126 if (state == AMD_CG_STATE_UNGATE)
6127 pp_state = 0;
6128
6129 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130 PP_BLOCK_GFX_3D,
6131 pp_support_state,
6132 pp_state);
6133 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6135 }
6136
6137 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6138 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6139 pp_support_state = PP_STATE_SUPPORT_LS;
6140 pp_state = PP_STATE_LS;
6141 }
6142
6143 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6144 pp_support_state |= PP_STATE_SUPPORT_CG;
6145 pp_state |= PP_STATE_CG;
6146 }
6147
6148 if (state == AMD_CG_STATE_UNGATE)
6149 pp_state = 0;
6150
6151 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152 PP_BLOCK_GFX_MG,
6153 pp_support_state,
6154 pp_state);
6155 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6156 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6157 }
6158
6159 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6160 pp_support_state = PP_STATE_SUPPORT_LS;
6161
6162 if (state == AMD_CG_STATE_UNGATE)
6163 pp_state = 0;
6164 else
6165 pp_state = PP_STATE_LS;
6166
6167 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168 PP_BLOCK_GFX_RLC,
6169 pp_support_state,
6170 pp_state);
6171 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6173 }
6174
6175 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6176 pp_support_state = PP_STATE_SUPPORT_LS;
6177
6178 if (state == AMD_CG_STATE_UNGATE)
6179 pp_state = 0;
6180 else
6181 pp_state = PP_STATE_LS;
6182 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6183 PP_BLOCK_GFX_CP,
6184 pp_support_state,
6185 pp_state);
6186 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6187 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6188 }
6189
6190 return 0;
6191}
6192
6193static int gfx_v8_0_set_clockgating_state(void *handle,
6194 enum amd_clockgating_state state)
6195{
6196 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6197
6198 if (amdgpu_sriov_vf(adev))
6199 return 0;
6200
6201 switch (adev->asic_type) {
6202 case CHIP_FIJI:
6203 case CHIP_CARRIZO:
6204 case CHIP_STONEY:
6205 gfx_v8_0_update_gfx_clock_gating(adev,
6206 state == AMD_CG_STATE_GATE);
6207 break;
6208 case CHIP_TONGA:
6209 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6210 break;
6211 case CHIP_POLARIS10:
6212 case CHIP_POLARIS11:
6213 case CHIP_POLARIS12:
6214 case CHIP_VEGAM:
6215 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6216 break;
6217 default:
6218 break;
6219 }
6220 return 0;
6221}
6222
6223static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6224{
6225 return ring->adev->wb.wb[ring->rptr_offs];
6226}
6227
6228static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6229{
6230 struct amdgpu_device *adev = ring->adev;
6231
6232 if (ring->use_doorbell)
6233
6234 return ring->adev->wb.wb[ring->wptr_offs];
6235 else
6236 return RREG32(mmCP_RB0_WPTR);
6237}
6238
6239static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6240{
6241 struct amdgpu_device *adev = ring->adev;
6242
6243 if (ring->use_doorbell) {
6244
6245 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6246 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6247 } else {
6248 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6249 (void)RREG32(mmCP_RB0_WPTR);
6250 }
6251}
6252
6253static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6254{
6255 u32 ref_and_mask, reg_mem_engine;
6256
6257 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6258 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6259 switch (ring->me) {
6260 case 1:
6261 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6262 break;
6263 case 2:
6264 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6265 break;
6266 default:
6267 return;
6268 }
6269 reg_mem_engine = 0;
6270 } else {
6271 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6272 reg_mem_engine = WAIT_REG_MEM_ENGINE(1);
6273 }
6274
6275 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6276 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
6277 WAIT_REG_MEM_FUNCTION(3) |
6278 reg_mem_engine));
6279 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6280 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6281 amdgpu_ring_write(ring, ref_and_mask);
6282 amdgpu_ring_write(ring, ref_and_mask);
6283 amdgpu_ring_write(ring, 0x20);
6284}
6285
6286static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6287{
6288 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6289 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6290 EVENT_INDEX(4));
6291
6292 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6293 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294 EVENT_INDEX(0));
6295}
6296
6297static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6298 struct amdgpu_ib *ib,
6299 unsigned vmid, bool ctx_switch)
6300{
6301 u32 header, control = 0;
6302
6303 if (ib->flags & AMDGPU_IB_FLAG_CE)
6304 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6305 else
6306 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6307
6308 control |= ib->length_dw | (vmid << 24);
6309
6310 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6311 control |= INDIRECT_BUFFER_PRE_ENB(1);
6312
6313 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6314 gfx_v8_0_ring_emit_de_meta(ring);
6315 }
6316
6317 amdgpu_ring_write(ring, header);
6318 amdgpu_ring_write(ring,
6319#ifdef __BIG_ENDIAN
6320 (2 << 0) |
6321#endif
6322 (ib->gpu_addr & 0xFFFFFFFC));
6323 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6324 amdgpu_ring_write(ring, control);
6325}
6326
6327static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6328 struct amdgpu_ib *ib,
6329 unsigned vmid, bool ctx_switch)
6330{
6331 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6332
6333 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6334 amdgpu_ring_write(ring,
6335#ifdef __BIG_ENDIAN
6336 (2 << 0) |
6337#endif
6338 (ib->gpu_addr & 0xFFFFFFFC));
6339 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6340 amdgpu_ring_write(ring, control);
6341}
6342
6343static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6344 u64 seq, unsigned flags)
6345{
6346 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6347 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6348
6349
6350 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6351 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6352 EOP_TC_ACTION_EN |
6353 EOP_TC_WB_ACTION_EN |
6354 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6355 EVENT_INDEX(5)));
6356 amdgpu_ring_write(ring, addr & 0xfffffffc);
6357 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6358 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6359 amdgpu_ring_write(ring, lower_32_bits(seq));
6360 amdgpu_ring_write(ring, upper_32_bits(seq));
6361
6362}
6363
6364static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6365{
6366 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6367 uint32_t seq = ring->fence_drv.sync_seq;
6368 uint64_t addr = ring->fence_drv.gpu_addr;
6369
6370 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6371 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
6372 WAIT_REG_MEM_FUNCTION(3) |
6373 WAIT_REG_MEM_ENGINE(usepfp)));
6374 amdgpu_ring_write(ring, addr & 0xfffffffc);
6375 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6376 amdgpu_ring_write(ring, seq);
6377 amdgpu_ring_write(ring, 0xffffffff);
6378 amdgpu_ring_write(ring, 4);
6379}
6380
6381static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6382 unsigned vmid, uint64_t pd_addr)
6383{
6384 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6385
6386 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6387
6388
6389 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6390 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
6391 WAIT_REG_MEM_FUNCTION(0) |
6392 WAIT_REG_MEM_ENGINE(0)));
6393 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6394 amdgpu_ring_write(ring, 0);
6395 amdgpu_ring_write(ring, 0);
6396 amdgpu_ring_write(ring, 0);
6397 amdgpu_ring_write(ring, 0x20);
6398
6399
6400 if (usepfp) {
6401
6402 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6403 amdgpu_ring_write(ring, 0x0);
6404 }
6405}
6406
6407static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6408{
6409 return ring->adev->wb.wb[ring->wptr_offs];
6410}
6411
6412static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6413{
6414 struct amdgpu_device *adev = ring->adev;
6415
6416
6417 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6418 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6419}
6420
6421static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6422 bool acquire)
6423{
6424 struct amdgpu_device *adev = ring->adev;
6425 int pipe_num, tmp, reg;
6426 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6427
6428 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6429
6430
6431 if (ring->me > 0)
6432 pipe_num -= 2;
6433
6434 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6435 tmp = RREG32(reg);
6436 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437 WREG32(reg, tmp);
6438}
6439
6440static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6441 struct amdgpu_ring *ring,
6442 bool acquire)
6443{
6444 int i, pipe;
6445 bool reserve;
6446 struct amdgpu_ring *iring;
6447
6448 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6449 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6450 if (acquire)
6451 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452 else
6453 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454
6455 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6456
6457 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6458 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6459 true);
6460
6461 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6462 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6463 true);
6464 } else {
6465
6466 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6467 iring = &adev->gfx.gfx_ring[i];
6468 pipe = amdgpu_gfx_queue_to_bit(adev,
6469 iring->me,
6470 iring->pipe,
6471 0);
6472 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6473 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6474 }
6475
6476 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6477 iring = &adev->gfx.compute_ring[i];
6478 pipe = amdgpu_gfx_queue_to_bit(adev,
6479 iring->me,
6480 iring->pipe,
6481 0);
6482 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6483 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484 }
6485 }
6486
6487 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6488}
6489
6490static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6491 struct amdgpu_ring *ring,
6492 bool acquire)
6493{
6494 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6495 uint32_t queue_priority = acquire ? 0xf : 0x0;
6496
6497 mutex_lock(&adev->srbm_mutex);
6498 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6499
6500 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6501 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6502
6503 vi_srbm_select(adev, 0, 0, 0, 0);
6504 mutex_unlock(&adev->srbm_mutex);
6505}
6506static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6507 enum drm_sched_priority priority)
6508{
6509 struct amdgpu_device *adev = ring->adev;
6510 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6511
6512 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6513 return;
6514
6515 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6516 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6517}
6518
6519static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520 u64 addr, u64 seq,
6521 unsigned flags)
6522{
6523 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6524 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6525
6526
6527 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6528 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6529 EOP_TC_ACTION_EN |
6530 EOP_TC_WB_ACTION_EN |
6531 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6532 EVENT_INDEX(5)));
6533 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6534 amdgpu_ring_write(ring, addr & 0xfffffffc);
6535 amdgpu_ring_write(ring, upper_32_bits(addr));
6536 amdgpu_ring_write(ring, lower_32_bits(seq));
6537 amdgpu_ring_write(ring, upper_32_bits(seq));
6538}
6539
6540static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6541 u64 seq, unsigned int flags)
6542{
6543
6544 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6545
6546
6547 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6548 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6549 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6550 amdgpu_ring_write(ring, lower_32_bits(addr));
6551 amdgpu_ring_write(ring, upper_32_bits(addr));
6552 amdgpu_ring_write(ring, lower_32_bits(seq));
6553
6554 if (flags & AMDGPU_FENCE_FLAG_INT) {
6555
6556 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6557 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6558 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6559 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6560 amdgpu_ring_write(ring, 0);
6561 amdgpu_ring_write(ring, 0x20000000);
6562 }
6563}
6564
6565static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6566{
6567 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6568 amdgpu_ring_write(ring, 0);
6569}
6570
6571static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572{
6573 uint32_t dw2 = 0;
6574
6575 if (amdgpu_sriov_vf(ring->adev))
6576 gfx_v8_0_ring_emit_ce_meta(ring);
6577
6578 dw2 |= 0x80000000;
6579 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6580 gfx_v8_0_ring_emit_vgt_flush(ring);
6581
6582 dw2 |= 0x8001;
6583
6584 dw2 |= 0x01000000;
6585
6586 dw2 |= 0x10002;
6587
6588
6589 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6590 dw2 |= 0x10000000;
6591 } else {
6592
6593
6594
6595 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596 dw2 |= 0x10000000;
6597 }
6598
6599 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6600 amdgpu_ring_write(ring, dw2);
6601 amdgpu_ring_write(ring, 0);
6602}
6603
6604static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605{
6606 unsigned ret;
6607
6608 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6609 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6610 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6611 amdgpu_ring_write(ring, 0);
6612 ret = ring->wptr & ring->buf_mask;
6613 amdgpu_ring_write(ring, 0x55aa55aa);
6614 return ret;
6615}
6616
6617static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618{
6619 unsigned cur;
6620
6621 BUG_ON(offset > ring->buf_mask);
6622 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6623
6624 cur = (ring->wptr & ring->buf_mask) - 1;
6625 if (likely(cur > offset))
6626 ring->ring[offset] = cur - offset;
6627 else
6628 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6629}
6630
6631static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6632{
6633 struct amdgpu_device *adev = ring->adev;
6634
6635 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6636 amdgpu_ring_write(ring, 0 |
6637 (5 << 8) |
6638 (1 << 20));
6639 amdgpu_ring_write(ring, reg);
6640 amdgpu_ring_write(ring, 0);
6641 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6642 adev->virt.reg_val_offs * 4));
6643 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6644 adev->virt.reg_val_offs * 4));
6645}
6646
6647static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6648 uint32_t val)
6649{
6650 uint32_t cmd;
6651
6652 switch (ring->funcs->type) {
6653 case AMDGPU_RING_TYPE_GFX:
6654 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6655 break;
6656 case AMDGPU_RING_TYPE_KIQ:
6657 cmd = 1 << 16;
6658 break;
6659 default:
6660 cmd = WR_CONFIRM;
6661 break;
6662 }
6663
6664 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6665 amdgpu_ring_write(ring, cmd);
6666 amdgpu_ring_write(ring, reg);
6667 amdgpu_ring_write(ring, 0);
6668 amdgpu_ring_write(ring, val);
6669}
6670
6671static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6672 enum amdgpu_interrupt_state state)
6673{
6674 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6675 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6676}
6677
6678static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6679 int me, int pipe,
6680 enum amdgpu_interrupt_state state)
6681{
6682 u32 mec_int_cntl, mec_int_cntl_reg;
6683
6684
6685
6686
6687
6688
6689
6690 if (me == 1) {
6691 switch (pipe) {
6692 case 0:
6693 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6694 break;
6695 case 1:
6696 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6697 break;
6698 case 2:
6699 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6700 break;
6701 case 3:
6702 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6703 break;
6704 default:
6705 DRM_DEBUG("invalid pipe %d\n", pipe);
6706 return;
6707 }
6708 } else {
6709 DRM_DEBUG("invalid me %d\n", me);
6710 return;
6711 }
6712
6713 switch (state) {
6714 case AMDGPU_IRQ_STATE_DISABLE:
6715 mec_int_cntl = RREG32(mec_int_cntl_reg);
6716 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6717 WREG32(mec_int_cntl_reg, mec_int_cntl);
6718 break;
6719 case AMDGPU_IRQ_STATE_ENABLE:
6720 mec_int_cntl = RREG32(mec_int_cntl_reg);
6721 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6722 WREG32(mec_int_cntl_reg, mec_int_cntl);
6723 break;
6724 default:
6725 break;
6726 }
6727}
6728
6729static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6730 struct amdgpu_irq_src *source,
6731 unsigned type,
6732 enum amdgpu_interrupt_state state)
6733{
6734 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6735 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6736
6737 return 0;
6738}
6739
6740static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6741 struct amdgpu_irq_src *source,
6742 unsigned type,
6743 enum amdgpu_interrupt_state state)
6744{
6745 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6746 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6747
6748 return 0;
6749}
6750
6751static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6752 struct amdgpu_irq_src *src,
6753 unsigned type,
6754 enum amdgpu_interrupt_state state)
6755{
6756 switch (type) {
6757 case AMDGPU_CP_IRQ_GFX_EOP:
6758 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6759 break;
6760 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6761 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6762 break;
6763 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6764 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6765 break;
6766 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6767 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6768 break;
6769 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6770 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6771 break;
6772 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6773 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6774 break;
6775 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6776 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6777 break;
6778 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6779 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6780 break;
6781 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6782 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6783 break;
6784 default:
6785 break;
6786 }
6787 return 0;
6788}
6789
6790static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6791 struct amdgpu_irq_src *source,
6792 struct amdgpu_iv_entry *entry)
6793{
6794 int i;
6795 u8 me_id, pipe_id, queue_id;
6796 struct amdgpu_ring *ring;
6797
6798 DRM_DEBUG("IH: CP EOP\n");
6799 me_id = (entry->ring_id & 0x0c) >> 2;
6800 pipe_id = (entry->ring_id & 0x03) >> 0;
6801 queue_id = (entry->ring_id & 0x70) >> 4;
6802
6803 switch (me_id) {
6804 case 0:
6805 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6806 break;
6807 case 1:
6808 case 2:
6809 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6810 ring = &adev->gfx.compute_ring[i];
6811
6812
6813
6814 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6815 amdgpu_fence_process(ring);
6816 }
6817 break;
6818 }
6819 return 0;
6820}
6821
6822static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6823 struct amdgpu_irq_src *source,
6824 struct amdgpu_iv_entry *entry)
6825{
6826 DRM_ERROR("Illegal register access in command stream\n");
6827 schedule_work(&adev->reset_work);
6828 return 0;
6829}
6830
6831static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6832 struct amdgpu_irq_src *source,
6833 struct amdgpu_iv_entry *entry)
6834{
6835 DRM_ERROR("Illegal instruction in command stream\n");
6836 schedule_work(&adev->reset_work);
6837 return 0;
6838}
6839
6840static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6841 struct amdgpu_irq_src *src,
6842 unsigned int type,
6843 enum amdgpu_interrupt_state state)
6844{
6845 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6846
6847 switch (type) {
6848 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6849 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6850 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6851 if (ring->me == 1)
6852 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6853 ring->pipe,
6854 GENERIC2_INT_ENABLE,
6855 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6856 else
6857 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6858 ring->pipe,
6859 GENERIC2_INT_ENABLE,
6860 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6861 break;
6862 default:
6863 BUG();
6864 break;
6865 }
6866 return 0;
6867}
6868
6869static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6870 struct amdgpu_irq_src *source,
6871 struct amdgpu_iv_entry *entry)
6872{
6873 u8 me_id, pipe_id, queue_id;
6874 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6875
6876 me_id = (entry->ring_id & 0x0c) >> 2;
6877 pipe_id = (entry->ring_id & 0x03) >> 0;
6878 queue_id = (entry->ring_id & 0x70) >> 4;
6879 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6880 me_id, pipe_id, queue_id);
6881
6882 amdgpu_fence_process(ring);
6883 return 0;
6884}
6885
6886static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6887 .name = "gfx_v8_0",
6888 .early_init = gfx_v8_0_early_init,
6889 .late_init = gfx_v8_0_late_init,
6890 .sw_init = gfx_v8_0_sw_init,
6891 .sw_fini = gfx_v8_0_sw_fini,
6892 .hw_init = gfx_v8_0_hw_init,
6893 .hw_fini = gfx_v8_0_hw_fini,
6894 .suspend = gfx_v8_0_suspend,
6895 .resume = gfx_v8_0_resume,
6896 .is_idle = gfx_v8_0_is_idle,
6897 .wait_for_idle = gfx_v8_0_wait_for_idle,
6898 .check_soft_reset = gfx_v8_0_check_soft_reset,
6899 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6900 .soft_reset = gfx_v8_0_soft_reset,
6901 .post_soft_reset = gfx_v8_0_post_soft_reset,
6902 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6903 .set_powergating_state = gfx_v8_0_set_powergating_state,
6904 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6905};
6906
6907static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6908 .type = AMDGPU_RING_TYPE_GFX,
6909 .align_mask = 0xff,
6910 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911 .support_64bit_ptrs = false,
6912 .get_rptr = gfx_v8_0_ring_get_rptr,
6913 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6914 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6915 .emit_frame_size =
6916 5 +
6917 7 +
6918 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 +
6919 8 +
6920 20 +
6921 4 +
6922
6923
6924 5 +
6925 7 +
6926 4 +
6927 14 +
6928 31 +
6929 3 +
6930 5 +
6931 8 + 8 +
6932 2,
6933 .emit_ib_size = 4,
6934 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6935 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6936 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6937 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6938 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6939 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6940 .test_ring = gfx_v8_0_ring_test_ring,
6941 .test_ib = gfx_v8_0_ring_test_ib,
6942 .insert_nop = amdgpu_ring_insert_nop,
6943 .pad_ib = amdgpu_ring_generic_pad_ib,
6944 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6945 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6946 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6947 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6948 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6949};
6950
6951static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6952 .type = AMDGPU_RING_TYPE_COMPUTE,
6953 .align_mask = 0xff,
6954 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6955 .support_64bit_ptrs = false,
6956 .get_rptr = gfx_v8_0_ring_get_rptr,
6957 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6959 .emit_frame_size =
6960 20 +
6961 7 +
6962 5 +
6963 7 +
6964 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
6965 7 + 7 + 7,
6966 .emit_ib_size = 4,
6967 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6968 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6969 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6970 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6972 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6973 .test_ring = gfx_v8_0_ring_test_ring,
6974 .test_ib = gfx_v8_0_ring_test_ib,
6975 .insert_nop = amdgpu_ring_insert_nop,
6976 .pad_ib = amdgpu_ring_generic_pad_ib,
6977 .set_priority = gfx_v8_0_ring_set_priority_compute,
6978 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6979};
6980
6981static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982 .type = AMDGPU_RING_TYPE_KIQ,
6983 .align_mask = 0xff,
6984 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985 .support_64bit_ptrs = false,
6986 .get_rptr = gfx_v8_0_ring_get_rptr,
6987 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989 .emit_frame_size =
6990 20 +
6991 7 +
6992 5 +
6993 7 +
6994 17 +
6995 7 + 7 + 7,
6996 .emit_ib_size = 4,
6997 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6999 .test_ring = gfx_v8_0_ring_test_ring,
7000 .test_ib = gfx_v8_0_ring_test_ib,
7001 .insert_nop = amdgpu_ring_insert_nop,
7002 .pad_ib = amdgpu_ring_generic_pad_ib,
7003 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7004 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7005};
7006
7007static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7008{
7009 int i;
7010
7011 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7012
7013 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7014 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7015
7016 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7017 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7018}
7019
7020static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7021 .set = gfx_v8_0_set_eop_interrupt_state,
7022 .process = gfx_v8_0_eop_irq,
7023};
7024
7025static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7026 .set = gfx_v8_0_set_priv_reg_fault_state,
7027 .process = gfx_v8_0_priv_reg_irq,
7028};
7029
7030static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7031 .set = gfx_v8_0_set_priv_inst_fault_state,
7032 .process = gfx_v8_0_priv_inst_irq,
7033};
7034
7035static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7036 .set = gfx_v8_0_kiq_set_interrupt_state,
7037 .process = gfx_v8_0_kiq_irq,
7038};
7039
7040static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7041{
7042 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7043 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7044
7045 adev->gfx.priv_reg_irq.num_types = 1;
7046 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7047
7048 adev->gfx.priv_inst_irq.num_types = 1;
7049 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7050
7051 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7052 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7053}
7054
7055static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7056{
7057 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7058}
7059
7060static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7061{
7062
7063 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7064 adev->gds.gws.total_size = 64;
7065 adev->gds.oa.total_size = 16;
7066
7067 if (adev->gds.mem.total_size == 64 * 1024) {
7068 adev->gds.mem.gfx_partition_size = 4096;
7069 adev->gds.mem.cs_partition_size = 4096;
7070
7071 adev->gds.gws.gfx_partition_size = 4;
7072 adev->gds.gws.cs_partition_size = 4;
7073
7074 adev->gds.oa.gfx_partition_size = 4;
7075 adev->gds.oa.cs_partition_size = 1;
7076 } else {
7077 adev->gds.mem.gfx_partition_size = 1024;
7078 adev->gds.mem.cs_partition_size = 1024;
7079
7080 adev->gds.gws.gfx_partition_size = 16;
7081 adev->gds.gws.cs_partition_size = 16;
7082
7083 adev->gds.oa.gfx_partition_size = 4;
7084 adev->gds.oa.cs_partition_size = 4;
7085 }
7086}
7087
7088static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089 u32 bitmap)
7090{
7091 u32 data;
7092
7093 if (!bitmap)
7094 return;
7095
7096 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7097 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7098
7099 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7100}
7101
7102static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7103{
7104 u32 data, mask;
7105
7106 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7107 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7108
7109 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7110
7111 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7112}
7113
7114static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7115{
7116 int i, j, k, counter, active_cu_number = 0;
7117 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7118 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7119 unsigned disable_masks[4 * 2];
7120 u32 ao_cu_num;
7121
7122 memset(cu_info, 0, sizeof(*cu_info));
7123
7124 if (adev->flags & AMD_IS_APU)
7125 ao_cu_num = 2;
7126 else
7127 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7128
7129 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7130
7131 mutex_lock(&adev->grbm_idx_mutex);
7132 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7133 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134 mask = 1;
7135 ao_bitmap = 0;
7136 counter = 0;
7137 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7138 if (i < 4 && j < 2)
7139 gfx_v8_0_set_user_cu_inactive_bitmap(
7140 adev, disable_masks[i * 2 + j]);
7141 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7142 cu_info->bitmap[i][j] = bitmap;
7143
7144 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7145 if (bitmap & mask) {
7146 if (counter < ao_cu_num)
7147 ao_bitmap |= mask;
7148 counter ++;
7149 }
7150 mask <<= 1;
7151 }
7152 active_cu_number += counter;
7153 if (i < 2 && j < 2)
7154 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7155 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7156 }
7157 }
7158 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7159 mutex_unlock(&adev->grbm_idx_mutex);
7160
7161 cu_info->number = active_cu_number;
7162 cu_info->ao_cu_mask = ao_cu_mask;
7163 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7164 cu_info->max_waves_per_simd = 10;
7165 cu_info->max_scratch_slots_per_cu = 32;
7166 cu_info->wave_front_size = 64;
7167 cu_info->lds_size = 64;
7168}
7169
7170const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7171{
7172 .type = AMD_IP_BLOCK_TYPE_GFX,
7173 .major = 8,
7174 .minor = 0,
7175 .rev = 0,
7176 .funcs = &gfx_v8_0_ip_funcs,
7177};
7178
7179const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7180{
7181 .type = AMD_IP_BLOCK_TYPE_GFX,
7182 .major = 8,
7183 .minor = 1,
7184 .rev = 0,
7185 .funcs = &gfx_v8_0_ip_funcs,
7186};
7187
7188static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7189{
7190 uint64_t ce_payload_addr;
7191 int cnt_ce;
7192 union {
7193 struct vi_ce_ib_state regular;
7194 struct vi_ce_ib_state_chained_ib chained;
7195 } ce_payload = {};
7196
7197 if (ring->adev->virt.chained_ib_support) {
7198 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7199 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7200 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7201 } else {
7202 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7203 offsetof(struct vi_gfx_meta_data, ce_payload);
7204 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7205 }
7206
7207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7209 WRITE_DATA_DST_SEL(8) |
7210 WR_CONFIRM) |
7211 WRITE_DATA_CACHE_POLICY(0));
7212 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7213 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7214 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7215}
7216
7217static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7218{
7219 uint64_t de_payload_addr, gds_addr, csa_addr;
7220 int cnt_de;
7221 union {
7222 struct vi_de_ib_state regular;
7223 struct vi_de_ib_state_chained_ib chained;
7224 } de_payload = {};
7225
7226 csa_addr = amdgpu_csa_vaddr(ring->adev);
7227 gds_addr = csa_addr + 4096;
7228 if (ring->adev->virt.chained_ib_support) {
7229 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7230 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7231 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7232 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7233 } else {
7234 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7235 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7236 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7237 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7238 }
7239
7240 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7241 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7242 WRITE_DATA_DST_SEL(8) |
7243 WR_CONFIRM) |
7244 WRITE_DATA_CACHE_POLICY(0));
7245 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7246 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7247 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7248}
7249