1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26
27#include "amdgpu.h"
28#include "amdgpu_ih.h"
29#include "amdgpu_gfx.h"
30#include "cikd.h"
31#include "cik.h"
32#include "cik_structs.h"
33#include "atom.h"
34#include "amdgpu_ucode.h"
35#include "clearstate_ci.h"
36
37#include "dce/dce_8_0_d.h"
38#include "dce/dce_8_0_sh_mask.h"
39
40#include "bif/bif_4_1_d.h"
41#include "bif/bif_4_1_sh_mask.h"
42
43#include "gca/gfx_7_0_d.h"
44#include "gca/gfx_7_2_enum.h"
45#include "gca/gfx_7_2_sh_mask.h"
46
47#include "gmc/gmc_7_0_d.h"
48#include "gmc/gmc_7_0_sh_mask.h"
49
50#include "oss/oss_2_0_d.h"
51#include "oss/oss_2_0_sh_mask.h"
52
53#define NUM_SIMD_PER_CU 0x4
54
55#define GFX7_NUM_GFX_RINGS 1
56#define GFX7_MEC_HPD_SIZE 2048
57
58static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
59static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
60static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
61
62MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
63MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
64MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
65MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
66MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
67
68MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
69MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
70MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
71MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
72MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
73
74MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
75MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
76MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
77MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
78MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
79MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
80
81MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
82MODULE_FIRMWARE("amdgpu/kabini_me.bin");
83MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
84MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
85MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
86
87MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
88MODULE_FIRMWARE("amdgpu/mullins_me.bin");
89MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
90MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
91MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
92
93static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
94{
95 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
96 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
97 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
98 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
99 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
100 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
101 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
102 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
103 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
104 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
105 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
106 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
107 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
108 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
109 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
110 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
111};
112
113static const u32 spectre_rlc_save_restore_register_list[] =
114{
115 (0x0e00 << 16) | (0xc12c >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc140 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc150 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc15c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc168 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc170 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc178 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc204 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b4 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2b8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2bc >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc2c0 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8228 >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x829c >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0x869c >> 2),
144 0x00000000,
145 (0x0600 << 16) | (0x98f4 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x98f8 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0x9900 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc260 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x90e8 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c000 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x3c00c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x8c1c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x9700 >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x4e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x5e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x6e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x7e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x8e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0x9e00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xae00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0xbe00 << 16) | (0xcd20 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x89bc >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0x8900 >> 2),
184 0x00000000,
185 0x3,
186 (0x0e00 << 16) | (0xc130 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc134 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc1fc >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc208 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc264 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc268 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc26c >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc270 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc274 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc278 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc27c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc280 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc284 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc288 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc28c >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc290 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc294 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc298 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc29c >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a0 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a4 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2a8 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2ac >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc2b0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x301d0 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30238 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30250 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30254 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x30258 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0x3025c >> 2),
245 0x00000000,
246 (0x4e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x5e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x6e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x7e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x8e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0x9e00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xae00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0xbe00 << 16) | (0xc900 >> 2),
261 0x00000000,
262 (0x4e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x5e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x6e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x7e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x8e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0x9e00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xae00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0xbe00 << 16) | (0xc904 >> 2),
277 0x00000000,
278 (0x4e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x5e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x6e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x7e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x8e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0x9e00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xae00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0xbe00 << 16) | (0xc908 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xc90c >> 2),
309 0x00000000,
310 (0x4e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x5e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x6e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x7e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x8e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0x9e00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xae00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0xbe00 << 16) | (0xc910 >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0xc99c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x9834 >> 2),
329 0x00000000,
330 (0x0000 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0001 << 16) | (0x30f00 >> 2),
333 0x00000000,
334 (0x0000 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0001 << 16) | (0x30f04 >> 2),
337 0x00000000,
338 (0x0000 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0001 << 16) | (0x30f08 >> 2),
341 0x00000000,
342 (0x0000 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0001 << 16) | (0x30f0c >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x9b7c >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a14 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0x8a18 >> 2),
351 0x00000000,
352 (0x0600 << 16) | (0x30a00 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bf0 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8bcc >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x8b24 >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0x30a04 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a10 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a14 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a18 >> 2),
367 0x00000000,
368 (0x0600 << 16) | (0x30a2c >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc700 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc704 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc708 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0xc768 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc770 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc774 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc778 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc77c >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc780 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc784 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc788 >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc78c >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc798 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc79c >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a0 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a4 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7a8 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7ac >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b0 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc7b4 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x9100 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x3c010 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92a8 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92ac >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b4 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92b8 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92bc >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c0 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92c8 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92cc >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92d0 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c00 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c04 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c20 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c38 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x8c3c >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xae00 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x9604 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac08 >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac0c >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac10 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac14 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac58 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac68 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac6c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac70 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac74 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac78 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac7c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac80 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac84 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac88 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac8c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x970c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9714 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x9718 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x971c >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x4e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x5e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x6e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x7e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x8e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0x9e00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xae00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0xbe00 << 16) | (0x31068 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd10 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0xcd14 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b0 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b4 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88b8 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x88bc >> 2),
515 0x00000000,
516 (0x0400 << 16) | (0x89c0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c4 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88c8 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d0 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d4 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88d8 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x8980 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x30938 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x3093c >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x30940 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x89a0 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30900 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x30904 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x89b4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c210 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c214 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x3c218 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x8904 >> 2),
551 0x00000000,
552 0x5,
553 (0x0e00 << 16) | (0x8c28 >> 2),
554 (0x0e00 << 16) | (0x8c2c >> 2),
555 (0x0e00 << 16) | (0x8c30 >> 2),
556 (0x0e00 << 16) | (0x8c34 >> 2),
557 (0x0e00 << 16) | (0x9600 >> 2),
558};
559
560static const u32 kalindi_rlc_save_restore_register_list[] =
561{
562 (0x0e00 << 16) | (0xc12c >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc140 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc150 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc15c >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc168 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc170 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc204 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b4 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2b8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2bc >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc2c0 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8228 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x829c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x869c >> 2),
589 0x00000000,
590 (0x0600 << 16) | (0x98f4 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x98f8 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x9900 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc260 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x90e8 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c000 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x3c00c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x8c1c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x9700 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x4e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x5e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x6e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x7e00 << 16) | (0xcd20 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x89bc >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x8900 >> 2),
621 0x00000000,
622 0x3,
623 (0x0e00 << 16) | (0xc130 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc134 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc1fc >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc208 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc264 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc268 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc26c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc270 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc274 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc28c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc290 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc294 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc298 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a0 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a4 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2a8 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc2ac >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x301d0 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30238 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30250 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30254 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x30258 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x3025c >> 2),
668 0x00000000,
669 (0x4e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x5e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x6e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x7e00 << 16) | (0xc900 >> 2),
676 0x00000000,
677 (0x4e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x5e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x6e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x7e00 << 16) | (0xc904 >> 2),
684 0x00000000,
685 (0x4e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x5e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x6e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x7e00 << 16) | (0xc908 >> 2),
692 0x00000000,
693 (0x4e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x5e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x6e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x7e00 << 16) | (0xc90c >> 2),
700 0x00000000,
701 (0x4e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x5e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x6e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x7e00 << 16) | (0xc910 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0xc99c >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x9834 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f00 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f04 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f08 >> 2),
718 0x00000000,
719 (0x0000 << 16) | (0x30f0c >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x9b7c >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a14 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0x8a18 >> 2),
726 0x00000000,
727 (0x0600 << 16) | (0x30a00 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bf0 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8bcc >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8b24 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x30a04 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a10 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a14 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a18 >> 2),
742 0x00000000,
743 (0x0600 << 16) | (0x30a2c >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc700 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc704 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc708 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0xc768 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc770 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc774 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc798 >> 2),
758 0x00000000,
759 (0x0400 << 16) | (0xc79c >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x9100 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x3c010 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c00 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c04 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c20 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c38 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0x8c3c >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xae00 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0x9604 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac08 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac0c >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac10 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac14 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac58 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac68 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac6c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac70 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac74 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac78 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac7c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac80 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac84 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac88 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac8c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x970c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9714 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x9718 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x971c >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x4e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x5e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x6e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x7e00 << 16) | (0x31068 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd10 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xcd14 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b0 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b4 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88b8 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x88bc >> 2),
838 0x00000000,
839 (0x0400 << 16) | (0x89c0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c4 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88c8 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d0 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d4 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88d8 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x8980 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x30938 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x3093c >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x30940 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x89a0 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30900 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x30904 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x89b4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3e1fc >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c210 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c214 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3c218 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x8904 >> 2),
876 0x00000000,
877 0x5,
878 (0x0e00 << 16) | (0x8c28 >> 2),
879 (0x0e00 << 16) | (0x8c2c >> 2),
880 (0x0e00 << 16) | (0x8c30 >> 2),
881 (0x0e00 << 16) | (0x8c34 >> 2),
882 (0x0e00 << 16) | (0x9600 >> 2),
883};
884
885static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
886static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
887static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
888static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
889
890
891
892
893
894
895
896
897
898
899
900
901
902static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
903{
904 const char *chip_name;
905 char fw_name[30];
906 int err;
907
908 DRM_DEBUG("\n");
909
910 switch (adev->asic_type) {
911 case CHIP_BONAIRE:
912 chip_name = "bonaire";
913 break;
914 case CHIP_HAWAII:
915 chip_name = "hawaii";
916 break;
917 case CHIP_KAVERI:
918 chip_name = "kaveri";
919 break;
920 case CHIP_KABINI:
921 chip_name = "kabini";
922 break;
923 case CHIP_MULLINS:
924 chip_name = "mullins";
925 break;
926 default: BUG();
927 }
928
929 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
930 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
931 if (err)
932 goto out;
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934 if (err)
935 goto out;
936
937 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
938 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
939 if (err)
940 goto out;
941 err = amdgpu_ucode_validate(adev->gfx.me_fw);
942 if (err)
943 goto out;
944
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
946 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
947 if (err)
948 goto out;
949 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
950 if (err)
951 goto out;
952
953 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
954 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
955 if (err)
956 goto out;
957 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
958 if (err)
959 goto out;
960
961 if (adev->asic_type == CHIP_KAVERI) {
962 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
963 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 if (err)
965 goto out;
966 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 if (err)
968 goto out;
969 }
970
971 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
972 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
973 if (err)
974 goto out;
975 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
976
977out:
978 if (err) {
979 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
980 release_firmware(adev->gfx.pfp_fw);
981 adev->gfx.pfp_fw = NULL;
982 release_firmware(adev->gfx.me_fw);
983 adev->gfx.me_fw = NULL;
984 release_firmware(adev->gfx.ce_fw);
985 adev->gfx.ce_fw = NULL;
986 release_firmware(adev->gfx.mec_fw);
987 adev->gfx.mec_fw = NULL;
988 release_firmware(adev->gfx.mec2_fw);
989 adev->gfx.mec2_fw = NULL;
990 release_firmware(adev->gfx.rlc_fw);
991 adev->gfx.rlc_fw = NULL;
992 }
993 return err;
994}
995
996static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
997{
998 release_firmware(adev->gfx.pfp_fw);
999 adev->gfx.pfp_fw = NULL;
1000 release_firmware(adev->gfx.me_fw);
1001 adev->gfx.me_fw = NULL;
1002 release_firmware(adev->gfx.ce_fw);
1003 adev->gfx.ce_fw = NULL;
1004 release_firmware(adev->gfx.mec_fw);
1005 adev->gfx.mec_fw = NULL;
1006 release_firmware(adev->gfx.mec2_fw);
1007 adev->gfx.mec2_fw = NULL;
1008 release_firmware(adev->gfx.rlc_fw);
1009 adev->gfx.rlc_fw = NULL;
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1024{
1025 const u32 num_tile_mode_states =
1026 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1027 const u32 num_secondary_tile_mode_states =
1028 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1029 u32 reg_offset, split_equal_to_row_size;
1030 uint32_t *tile, *macrotile;
1031
1032 tile = adev->gfx.config.tile_mode_array;
1033 macrotile = adev->gfx.config.macrotile_mode_array;
1034
1035 switch (adev->gfx.config.mem_row_size_in_kb) {
1036 case 1:
1037 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1038 break;
1039 case 2:
1040 default:
1041 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1042 break;
1043 case 4:
1044 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1045 break;
1046 }
1047
1048 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1049 tile[reg_offset] = 0;
1050 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1051 macrotile[reg_offset] = 0;
1052
1053 switch (adev->asic_type) {
1054 case CHIP_BONAIRE:
1055 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1059 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1060 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1063 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1067 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1071 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1073 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1074 TILE_SPLIT(split_equal_to_row_size));
1075 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1076 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1078 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1081 TILE_SPLIT(split_equal_to_row_size));
1082 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1083 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1084 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1085 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1088 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1092 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1096 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1097 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1098 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1100 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1104 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1105 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1108 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1110 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1112 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1113 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1117 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1120 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1121 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1124 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1125 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1128 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1129 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1132 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1133 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1134 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1137 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1142 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1145 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1148 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1150 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1152 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1156 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1157
1158 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1161 NUM_BANKS(ADDR_SURF_16_BANK));
1162 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1169 NUM_BANKS(ADDR_SURF_16_BANK));
1170 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1173 NUM_BANKS(ADDR_SURF_16_BANK));
1174 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1177 NUM_BANKS(ADDR_SURF_16_BANK));
1178 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1181 NUM_BANKS(ADDR_SURF_8_BANK));
1182 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1185 NUM_BANKS(ADDR_SURF_4_BANK));
1186 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1189 NUM_BANKS(ADDR_SURF_16_BANK));
1190 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1193 NUM_BANKS(ADDR_SURF_16_BANK));
1194 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1201 NUM_BANKS(ADDR_SURF_16_BANK));
1202 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1205 NUM_BANKS(ADDR_SURF_16_BANK));
1206 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1209 NUM_BANKS(ADDR_SURF_8_BANK));
1210 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1213 NUM_BANKS(ADDR_SURF_4_BANK));
1214
1215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1216 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1217 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1218 if (reg_offset != 7)
1219 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1220 break;
1221 case CHIP_HAWAII:
1222 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1226 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1227 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1230 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1231 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1234 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1238 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1241 TILE_SPLIT(split_equal_to_row_size));
1242 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1245 TILE_SPLIT(split_equal_to_row_size));
1246 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1249 TILE_SPLIT(split_equal_to_row_size));
1250 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1253 TILE_SPLIT(split_equal_to_row_size));
1254 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1256 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1259 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1260 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1263 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1264 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1267 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1268 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1271 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1272 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1274 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1278 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1282 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1286 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1287 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1290 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1294 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1297 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1301 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1302 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1305 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1309 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1310 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1313 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1317 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1321 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1325 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1328 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1336 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1340
1341 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1344 NUM_BANKS(ADDR_SURF_16_BANK));
1345 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1348 NUM_BANKS(ADDR_SURF_16_BANK));
1349 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1352 NUM_BANKS(ADDR_SURF_16_BANK));
1353 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1356 NUM_BANKS(ADDR_SURF_16_BANK));
1357 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1360 NUM_BANKS(ADDR_SURF_8_BANK));
1361 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1364 NUM_BANKS(ADDR_SURF_4_BANK));
1365 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1368 NUM_BANKS(ADDR_SURF_4_BANK));
1369 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1372 NUM_BANKS(ADDR_SURF_16_BANK));
1373 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1376 NUM_BANKS(ADDR_SURF_16_BANK));
1377 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1380 NUM_BANKS(ADDR_SURF_16_BANK));
1381 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1384 NUM_BANKS(ADDR_SURF_8_BANK));
1385 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1388 NUM_BANKS(ADDR_SURF_16_BANK));
1389 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1392 NUM_BANKS(ADDR_SURF_8_BANK));
1393 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1396 NUM_BANKS(ADDR_SURF_4_BANK));
1397
1398 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1399 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1401 if (reg_offset != 7)
1402 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1403 break;
1404 case CHIP_KABINI:
1405 case CHIP_KAVERI:
1406 case CHIP_MULLINS:
1407 default:
1408 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1409 PIPE_CONFIG(ADDR_SURF_P2) |
1410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1412 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1413 PIPE_CONFIG(ADDR_SURF_P2) |
1414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1416 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1417 PIPE_CONFIG(ADDR_SURF_P2) |
1418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1420 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1421 PIPE_CONFIG(ADDR_SURF_P2) |
1422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1424 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1425 PIPE_CONFIG(ADDR_SURF_P2) |
1426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1427 TILE_SPLIT(split_equal_to_row_size));
1428 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1429 PIPE_CONFIG(ADDR_SURF_P2) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1431 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1432 PIPE_CONFIG(ADDR_SURF_P2) |
1433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1434 TILE_SPLIT(split_equal_to_row_size));
1435 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1436 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1437 PIPE_CONFIG(ADDR_SURF_P2));
1438 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1439 PIPE_CONFIG(ADDR_SURF_P2) |
1440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1441 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1442 PIPE_CONFIG(ADDR_SURF_P2) |
1443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1445 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1446 PIPE_CONFIG(ADDR_SURF_P2) |
1447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1449 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1450 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1451 PIPE_CONFIG(ADDR_SURF_P2) |
1452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1453 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1454 PIPE_CONFIG(ADDR_SURF_P2) |
1455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1457 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1458 PIPE_CONFIG(ADDR_SURF_P2) |
1459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1461 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1462 PIPE_CONFIG(ADDR_SURF_P2) |
1463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1465 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1466 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1467 PIPE_CONFIG(ADDR_SURF_P2) |
1468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1470 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1471 PIPE_CONFIG(ADDR_SURF_P2) |
1472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1473 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1474 PIPE_CONFIG(ADDR_SURF_P2) |
1475 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1477 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1478 PIPE_CONFIG(ADDR_SURF_P2) |
1479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1481 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1482 PIPE_CONFIG(ADDR_SURF_P2) |
1483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1485 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1486 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1487 PIPE_CONFIG(ADDR_SURF_P2) |
1488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1490 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1491 PIPE_CONFIG(ADDR_SURF_P2) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1494 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1495 PIPE_CONFIG(ADDR_SURF_P2) |
1496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1498 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1501 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1502 PIPE_CONFIG(ADDR_SURF_P2) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1505 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1506 PIPE_CONFIG(ADDR_SURF_P2) |
1507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1509 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1510
1511 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1514 NUM_BANKS(ADDR_SURF_8_BANK));
1515 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1518 NUM_BANKS(ADDR_SURF_8_BANK));
1519 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1522 NUM_BANKS(ADDR_SURF_8_BANK));
1523 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1526 NUM_BANKS(ADDR_SURF_8_BANK));
1527 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1530 NUM_BANKS(ADDR_SURF_8_BANK));
1531 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1534 NUM_BANKS(ADDR_SURF_8_BANK));
1535 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1538 NUM_BANKS(ADDR_SURF_8_BANK));
1539 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1542 NUM_BANKS(ADDR_SURF_16_BANK));
1543 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1546 NUM_BANKS(ADDR_SURF_16_BANK));
1547 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1550 NUM_BANKS(ADDR_SURF_16_BANK));
1551 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1554 NUM_BANKS(ADDR_SURF_16_BANK));
1555 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1558 NUM_BANKS(ADDR_SURF_16_BANK));
1559 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1562 NUM_BANKS(ADDR_SURF_16_BANK));
1563 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1566 NUM_BANKS(ADDR_SURF_8_BANK));
1567
1568 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1569 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1571 if (reg_offset != 7)
1572 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1573 break;
1574 }
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1589 u32 se_num, u32 sh_num, u32 instance)
1590{
1591 u32 data;
1592
1593 if (instance == 0xffffffff)
1594 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1595 else
1596 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1597
1598 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1599 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1600 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1601 else if (se_num == 0xffffffff)
1602 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1603 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1604 else if (sh_num == 0xffffffff)
1605 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1606 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1607 else
1608 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1609 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1610 WREG32(mmGRBM_GFX_INDEX, data);
1611}
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1622{
1623 u32 data, mask;
1624
1625 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1626 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1627
1628 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1629 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1630
1631 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1632 adev->gfx.config.max_sh_per_se);
1633
1634 return (~data) & mask;
1635}
1636
1637static void
1638gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1639{
1640 switch (adev->asic_type) {
1641 case CHIP_BONAIRE:
1642 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1643 SE_XSEL(1) | SE_YSEL(1);
1644 *rconf1 |= 0x0;
1645 break;
1646 case CHIP_HAWAII:
1647 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1648 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1649 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1650 SE_YSEL(3);
1651 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1652 SE_PAIR_YSEL(2);
1653 break;
1654 case CHIP_KAVERI:
1655 *rconf |= RB_MAP_PKR0(2);
1656 *rconf1 |= 0x0;
1657 break;
1658 case CHIP_KABINI:
1659 case CHIP_MULLINS:
1660 *rconf |= 0x0;
1661 *rconf1 |= 0x0;
1662 break;
1663 default:
1664 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1665 break;
1666 }
1667}
1668
1669static void
1670gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1671 u32 raster_config, u32 raster_config_1,
1672 unsigned rb_mask, unsigned num_rb)
1673{
1674 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1675 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1676 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1677 unsigned rb_per_se = num_rb / num_se;
1678 unsigned se_mask[4];
1679 unsigned se;
1680
1681 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1682 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1683 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1684 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1685
1686 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1687 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1688 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1689
1690 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1691 (!se_mask[2] && !se_mask[3]))) {
1692 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1693
1694 if (!se_mask[0] && !se_mask[1]) {
1695 raster_config_1 |=
1696 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1697 } else {
1698 raster_config_1 |=
1699 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1700 }
1701 }
1702
1703 for (se = 0; se < num_se; se++) {
1704 unsigned raster_config_se = raster_config;
1705 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1706 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1707 int idx = (se / 2) * 2;
1708
1709 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1710 raster_config_se &= ~SE_MAP_MASK;
1711
1712 if (!se_mask[idx]) {
1713 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1714 } else {
1715 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1716 }
1717 }
1718
1719 pkr0_mask &= rb_mask;
1720 pkr1_mask &= rb_mask;
1721 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1722 raster_config_se &= ~PKR_MAP_MASK;
1723
1724 if (!pkr0_mask) {
1725 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1726 } else {
1727 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1728 }
1729 }
1730
1731 if (rb_per_se >= 2) {
1732 unsigned rb0_mask = 1 << (se * rb_per_se);
1733 unsigned rb1_mask = rb0_mask << 1;
1734
1735 rb0_mask &= rb_mask;
1736 rb1_mask &= rb_mask;
1737 if (!rb0_mask || !rb1_mask) {
1738 raster_config_se &= ~RB_MAP_PKR0_MASK;
1739
1740 if (!rb0_mask) {
1741 raster_config_se |=
1742 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1743 } else {
1744 raster_config_se |=
1745 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1746 }
1747 }
1748
1749 if (rb_per_se > 2) {
1750 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1751 rb1_mask = rb0_mask << 1;
1752 rb0_mask &= rb_mask;
1753 rb1_mask &= rb_mask;
1754 if (!rb0_mask || !rb1_mask) {
1755 raster_config_se &= ~RB_MAP_PKR1_MASK;
1756
1757 if (!rb0_mask) {
1758 raster_config_se |=
1759 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1760 } else {
1761 raster_config_se |=
1762 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1763 }
1764 }
1765 }
1766 }
1767
1768
1769 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1770 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1771 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1772 }
1773
1774
1775 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1776}
1777
1778
1779
1780
1781
1782
1783
1784
1785static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1786{
1787 int i, j;
1788 u32 data;
1789 u32 raster_config = 0, raster_config_1 = 0;
1790 u32 active_rbs = 0;
1791 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1792 adev->gfx.config.max_sh_per_se;
1793 unsigned num_rb_pipes;
1794
1795 mutex_lock(&adev->grbm_idx_mutex);
1796 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1797 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1798 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1799 data = gfx_v7_0_get_rb_active_bitmap(adev);
1800 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1801 rb_bitmap_width_per_sh);
1802 }
1803 }
1804 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1805
1806 adev->gfx.config.backend_enable_mask = active_rbs;
1807 adev->gfx.config.num_rbs = hweight32(active_rbs);
1808
1809 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1810 adev->gfx.config.max_shader_engines, 16);
1811
1812 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1813
1814 if (!adev->gfx.config.backend_enable_mask ||
1815 adev->gfx.config.num_rbs >= num_rb_pipes) {
1816 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1817 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1818 } else {
1819 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1820 adev->gfx.config.backend_enable_mask,
1821 num_rb_pipes);
1822 }
1823
1824
1825 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1826 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1827 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1828 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1829 RREG32(mmCC_RB_BACKEND_DISABLE);
1830 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1831 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1832 adev->gfx.config.rb_config[i][j].raster_config =
1833 RREG32(mmPA_SC_RASTER_CONFIG);
1834 adev->gfx.config.rb_config[i][j].raster_config_1 =
1835 RREG32(mmPA_SC_RASTER_CONFIG_1);
1836 }
1837 }
1838 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1839 mutex_unlock(&adev->grbm_idx_mutex);
1840}
1841
1842#define DEFAULT_SH_MEM_BASES (0x6000)
1843
1844
1845
1846
1847
1848
1849
1850
1851static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1852{
1853 int i;
1854 uint32_t sh_mem_config;
1855 uint32_t sh_mem_bases;
1856
1857
1858
1859
1860
1861
1862
1863 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1864 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1865 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1866 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1867 mutex_lock(&adev->srbm_mutex);
1868 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1869 cik_srbm_select(adev, 0, 0, 0, i);
1870
1871 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1872 WREG32(mmSH_MEM_APE1_BASE, 1);
1873 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1874 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1875 }
1876 cik_srbm_select(adev, 0, 0, 0, 0);
1877 mutex_unlock(&adev->srbm_mutex);
1878
1879
1880
1881 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1882 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
1883 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
1884 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
1885 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
1886 }
1887}
1888
1889static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
1890{
1891 int vmid;
1892
1893
1894
1895
1896
1897
1898
1899 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
1900 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
1901 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
1902 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
1903 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
1904 }
1905}
1906
1907static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1908{
1909 adev->gfx.config.double_offchip_lds_buf = 1;
1910}
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1921{
1922 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1923 u32 tmp;
1924 int i;
1925
1926 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1927
1928 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1929 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1930 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1931
1932 gfx_v7_0_tiling_mode_table_init(adev);
1933
1934 gfx_v7_0_setup_rb(adev);
1935 gfx_v7_0_get_cu_info(adev);
1936 gfx_v7_0_config_init(adev);
1937
1938
1939 WREG32(mmCP_MEQ_THRESHOLDS,
1940 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1941 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1942
1943 mutex_lock(&adev->grbm_idx_mutex);
1944
1945
1946
1947
1948 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1949
1950
1951
1952 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1953 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1954 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1955 MTYPE_NC);
1956 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1957 MTYPE_UC);
1958 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1959
1960 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1961 SWIZZLE_ENABLE, 1);
1962 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1963 ELEMENT_SIZE, 1);
1964 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1965 INDEX_STRIDE, 3);
1966 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1967
1968 mutex_lock(&adev->srbm_mutex);
1969 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1970 if (i == 0)
1971 sh_mem_base = 0;
1972 else
1973 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1974 cik_srbm_select(adev, 0, 0, 0, i);
1975
1976 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1977 WREG32(mmSH_MEM_APE1_BASE, 1);
1978 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1979 WREG32(mmSH_MEM_BASES, sh_mem_base);
1980 }
1981 cik_srbm_select(adev, 0, 0, 0, 0);
1982 mutex_unlock(&adev->srbm_mutex);
1983
1984 gfx_v7_0_init_compute_vmid(adev);
1985 gfx_v7_0_init_gds_vmid(adev);
1986
1987 WREG32(mmSX_DEBUG_1, 0x20);
1988
1989 WREG32(mmTA_CNTL_AUX, 0x00010000);
1990
1991 tmp = RREG32(mmSPI_CONFIG_CNTL);
1992 tmp |= 0x03000000;
1993 WREG32(mmSPI_CONFIG_CNTL, tmp);
1994
1995 WREG32(mmSQ_CONFIG, 1);
1996
1997 WREG32(mmDB_DEBUG, 0);
1998
1999 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2000 tmp |= 0x00000400;
2001 WREG32(mmDB_DEBUG2, tmp);
2002
2003 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2004 tmp |= 0x00020200;
2005 WREG32(mmDB_DEBUG3, tmp);
2006
2007 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2008 tmp |= 0x00018208;
2009 WREG32(mmCB_HW_CONTROL, tmp);
2010
2011 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2012
2013 WREG32(mmPA_SC_FIFO_SIZE,
2014 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2015 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2016 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2017 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2018
2019 WREG32(mmVGT_NUM_INSTANCES, 1);
2020
2021 WREG32(mmCP_PERFMON_CNTL, 0);
2022
2023 WREG32(mmSQ_CONFIG, 0);
2024
2025 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2026 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2027 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2028
2029 WREG32(mmVGT_CACHE_INVALIDATION,
2030 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2031 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2032
2033 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2034 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2035
2036 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2037 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2038 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2039
2040 tmp = RREG32(mmSPI_ARB_PRIORITY);
2041 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2042 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2043 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2044 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2045 WREG32(mmSPI_ARB_PRIORITY, tmp);
2046
2047 mutex_unlock(&adev->grbm_idx_mutex);
2048
2049 udelay(50);
2050}
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2066{
2067 adev->gfx.scratch.num_reg = 8;
2068 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2069 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2070}
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2083{
2084 struct amdgpu_device *adev = ring->adev;
2085 uint32_t scratch;
2086 uint32_t tmp = 0;
2087 unsigned i;
2088 int r;
2089
2090 r = amdgpu_gfx_scratch_get(adev, &scratch);
2091 if (r)
2092 return r;
2093
2094 WREG32(scratch, 0xCAFEDEAD);
2095 r = amdgpu_ring_alloc(ring, 3);
2096 if (r)
2097 goto error_free_scratch;
2098
2099 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2100 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2101 amdgpu_ring_write(ring, 0xDEADBEEF);
2102 amdgpu_ring_commit(ring);
2103
2104 for (i = 0; i < adev->usec_timeout; i++) {
2105 tmp = RREG32(scratch);
2106 if (tmp == 0xDEADBEEF)
2107 break;
2108 udelay(1);
2109 }
2110 if (i >= adev->usec_timeout)
2111 r = -ETIMEDOUT;
2112
2113error_free_scratch:
2114 amdgpu_gfx_scratch_free(adev, scratch);
2115 return r;
2116}
2117
2118
2119
2120
2121
2122
2123
2124
2125static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2126{
2127 u32 ref_and_mask;
2128 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2129
2130 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2131 switch (ring->me) {
2132 case 1:
2133 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2134 break;
2135 case 2:
2136 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2137 break;
2138 default:
2139 return;
2140 }
2141 } else {
2142 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2143 }
2144
2145 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2146 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2147 WAIT_REG_MEM_FUNCTION(3) |
2148 WAIT_REG_MEM_ENGINE(usepfp)));
2149 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2150 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2151 amdgpu_ring_write(ring, ref_and_mask);
2152 amdgpu_ring_write(ring, ref_and_mask);
2153 amdgpu_ring_write(ring, 0x20);
2154}
2155
2156static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2157{
2158 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2159 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2160 EVENT_INDEX(4));
2161
2162 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2163 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2164 EVENT_INDEX(0));
2165}
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2179 u64 seq, unsigned flags)
2180{
2181 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2182 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2183
2184
2185
2186 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2187 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2188 EOP_TC_ACTION_EN |
2189 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2190 EVENT_INDEX(5)));
2191 amdgpu_ring_write(ring, addr & 0xfffffffc);
2192 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2193 DATA_SEL(1) | INT_SEL(0));
2194 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2195 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2196
2197
2198 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2199 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2200 EOP_TC_ACTION_EN |
2201 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2202 EVENT_INDEX(5)));
2203 amdgpu_ring_write(ring, addr & 0xfffffffc);
2204 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2205 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2206 amdgpu_ring_write(ring, lower_32_bits(seq));
2207 amdgpu_ring_write(ring, upper_32_bits(seq));
2208}
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2222 u64 addr, u64 seq,
2223 unsigned flags)
2224{
2225 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2226 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2227
2228
2229 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2230 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2231 EOP_TC_ACTION_EN |
2232 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2233 EVENT_INDEX(5)));
2234 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2235 amdgpu_ring_write(ring, addr & 0xfffffffc);
2236 amdgpu_ring_write(ring, upper_32_bits(addr));
2237 amdgpu_ring_write(ring, lower_32_bits(seq));
2238 amdgpu_ring_write(ring, upper_32_bits(seq));
2239}
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2259 struct amdgpu_job *job,
2260 struct amdgpu_ib *ib,
2261 uint32_t flags)
2262{
2263 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2264 u32 header, control = 0;
2265
2266
2267 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2268 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2269 amdgpu_ring_write(ring, 0);
2270 }
2271
2272 if (ib->flags & AMDGPU_IB_FLAG_CE)
2273 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2274 else
2275 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2276
2277 control |= ib->length_dw | (vmid << 24);
2278
2279 amdgpu_ring_write(ring, header);
2280 amdgpu_ring_write(ring,
2281#ifdef __BIG_ENDIAN
2282 (2 << 0) |
2283#endif
2284 (ib->gpu_addr & 0xFFFFFFFC));
2285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2286 amdgpu_ring_write(ring, control);
2287}
2288
2289static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2290 struct amdgpu_job *job,
2291 struct amdgpu_ib *ib,
2292 uint32_t flags)
2293{
2294 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2295 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
2308 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2309 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
2310 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
2311 }
2312
2313 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2314 amdgpu_ring_write(ring,
2315#ifdef __BIG_ENDIAN
2316 (2 << 0) |
2317#endif
2318 (ib->gpu_addr & 0xFFFFFFFC));
2319 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2320 amdgpu_ring_write(ring, control);
2321}
2322
2323static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2324{
2325 uint32_t dw2 = 0;
2326
2327 dw2 |= 0x80000000;
2328 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2329 gfx_v7_0_ring_emit_vgt_flush(ring);
2330
2331 dw2 |= 0x8001;
2332
2333 dw2 |= 0x01000000;
2334
2335 dw2 |= 0x10002;
2336 }
2337
2338 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2339 amdgpu_ring_write(ring, dw2);
2340 amdgpu_ring_write(ring, 0);
2341}
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2354{
2355 struct amdgpu_device *adev = ring->adev;
2356 struct amdgpu_ib ib;
2357 struct dma_fence *f = NULL;
2358 uint32_t scratch;
2359 uint32_t tmp = 0;
2360 long r;
2361
2362 r = amdgpu_gfx_scratch_get(adev, &scratch);
2363 if (r)
2364 return r;
2365
2366 WREG32(scratch, 0xCAFEDEAD);
2367 memset(&ib, 0, sizeof(ib));
2368 r = amdgpu_ib_get(adev, NULL, 256,
2369 AMDGPU_IB_POOL_DIRECT, &ib);
2370 if (r)
2371 goto err1;
2372
2373 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2374 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2375 ib.ptr[2] = 0xDEADBEEF;
2376 ib.length_dw = 3;
2377
2378 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2379 if (r)
2380 goto err2;
2381
2382 r = dma_fence_wait_timeout(f, false, timeout);
2383 if (r == 0) {
2384 r = -ETIMEDOUT;
2385 goto err2;
2386 } else if (r < 0) {
2387 goto err2;
2388 }
2389 tmp = RREG32(scratch);
2390 if (tmp == 0xDEADBEEF)
2391 r = 0;
2392 else
2393 r = -EINVAL;
2394
2395err2:
2396 amdgpu_ib_free(adev, &ib, NULL);
2397 dma_fence_put(f);
2398err1:
2399 amdgpu_gfx_scratch_free(adev, scratch);
2400 return r;
2401}
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2435{
2436 if (enable)
2437 WREG32(mmCP_ME_CNTL, 0);
2438 else
2439 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
2440 CP_ME_CNTL__PFP_HALT_MASK |
2441 CP_ME_CNTL__CE_HALT_MASK));
2442 udelay(50);
2443}
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2454{
2455 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2456 const struct gfx_firmware_header_v1_0 *ce_hdr;
2457 const struct gfx_firmware_header_v1_0 *me_hdr;
2458 const __le32 *fw_data;
2459 unsigned i, fw_size;
2460
2461 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2462 return -EINVAL;
2463
2464 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2465 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2466 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2467
2468 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2469 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2470 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2471 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2472 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2473 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2474 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2475 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2476 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2477
2478 gfx_v7_0_cp_gfx_enable(adev, false);
2479
2480
2481 fw_data = (const __le32 *)
2482 (adev->gfx.pfp_fw->data +
2483 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2484 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2485 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2486 for (i = 0; i < fw_size; i++)
2487 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2488 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2489
2490
2491 fw_data = (const __le32 *)
2492 (adev->gfx.ce_fw->data +
2493 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2494 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2495 WREG32(mmCP_CE_UCODE_ADDR, 0);
2496 for (i = 0; i < fw_size; i++)
2497 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2498 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2499
2500
2501 fw_data = (const __le32 *)
2502 (adev->gfx.me_fw->data +
2503 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2504 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2505 WREG32(mmCP_ME_RAM_WADDR, 0);
2506 for (i = 0; i < fw_size; i++)
2507 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2508 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2509
2510 return 0;
2511}
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2523{
2524 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2525 const struct cs_section_def *sect = NULL;
2526 const struct cs_extent_def *ext = NULL;
2527 int r, i;
2528
2529
2530 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2531 WREG32(mmCP_ENDIAN_SWAP, 0);
2532 WREG32(mmCP_DEVICE_ID, 1);
2533
2534 gfx_v7_0_cp_gfx_enable(adev, true);
2535
2536 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2537 if (r) {
2538 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2539 return r;
2540 }
2541
2542
2543 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2544 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2545 amdgpu_ring_write(ring, 0x8000);
2546 amdgpu_ring_write(ring, 0x8000);
2547
2548
2549 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2550 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2551
2552 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2553 amdgpu_ring_write(ring, 0x80000000);
2554 amdgpu_ring_write(ring, 0x80000000);
2555
2556 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2557 for (ext = sect->section; ext->extent != NULL; ++ext) {
2558 if (sect->id == SECT_CONTEXT) {
2559 amdgpu_ring_write(ring,
2560 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2561 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2562 for (i = 0; i < ext->reg_count; i++)
2563 amdgpu_ring_write(ring, ext->extent[i]);
2564 }
2565 }
2566 }
2567
2568 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2569 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2570 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2571 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2572
2573 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2574 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2575
2576 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2577 amdgpu_ring_write(ring, 0);
2578
2579 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2580 amdgpu_ring_write(ring, 0x00000316);
2581 amdgpu_ring_write(ring, 0x0000000e);
2582 amdgpu_ring_write(ring, 0x00000010);
2583
2584 amdgpu_ring_commit(ring);
2585
2586 return 0;
2587}
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2599{
2600 struct amdgpu_ring *ring;
2601 u32 tmp;
2602 u32 rb_bufsz;
2603 u64 rb_addr, rptr_addr;
2604 int r;
2605
2606 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2607 if (adev->asic_type != CHIP_HAWAII)
2608 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2609
2610
2611 WREG32(mmCP_RB_WPTR_DELAY, 0);
2612
2613
2614 WREG32(mmCP_RB_VMID, 0);
2615
2616 WREG32(mmSCRATCH_ADDR, 0);
2617
2618
2619
2620 ring = &adev->gfx.gfx_ring[0];
2621 rb_bufsz = order_base_2(ring->ring_size / 8);
2622 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2623#ifdef __BIG_ENDIAN
2624 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2625#endif
2626 WREG32(mmCP_RB0_CNTL, tmp);
2627
2628
2629 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2630 ring->wptr = 0;
2631 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2632
2633
2634 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2635 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2636 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2637
2638
2639 WREG32(mmSCRATCH_UMSK, 0);
2640
2641 mdelay(1);
2642 WREG32(mmCP_RB0_CNTL, tmp);
2643
2644 rb_addr = ring->gpu_addr >> 8;
2645 WREG32(mmCP_RB0_BASE, rb_addr);
2646 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2647
2648
2649 gfx_v7_0_cp_gfx_start(adev);
2650 r = amdgpu_ring_test_helper(ring);
2651 if (r)
2652 return r;
2653
2654 return 0;
2655}
2656
2657static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2658{
2659 return ring->adev->wb.wb[ring->rptr_offs];
2660}
2661
2662static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2663{
2664 struct amdgpu_device *adev = ring->adev;
2665
2666 return RREG32(mmCP_RB0_WPTR);
2667}
2668
2669static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2670{
2671 struct amdgpu_device *adev = ring->adev;
2672
2673 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2674 (void)RREG32(mmCP_RB0_WPTR);
2675}
2676
2677static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2678{
2679
2680 return ring->adev->wb.wb[ring->wptr_offs];
2681}
2682
2683static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2684{
2685 struct amdgpu_device *adev = ring->adev;
2686
2687
2688 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2689 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2690}
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2701{
2702 if (enable)
2703 WREG32(mmCP_MEC_CNTL, 0);
2704 else
2705 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
2706 CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2707 udelay(50);
2708}
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2719{
2720 const struct gfx_firmware_header_v1_0 *mec_hdr;
2721 const __le32 *fw_data;
2722 unsigned i, fw_size;
2723
2724 if (!adev->gfx.mec_fw)
2725 return -EINVAL;
2726
2727 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2728 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2729 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2730 adev->gfx.mec_feature_version = le32_to_cpu(
2731 mec_hdr->ucode_feature_version);
2732
2733 gfx_v7_0_cp_compute_enable(adev, false);
2734
2735
2736 fw_data = (const __le32 *)
2737 (adev->gfx.mec_fw->data +
2738 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2739 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2740 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2741 for (i = 0; i < fw_size; i++)
2742 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2743 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2744
2745 if (adev->asic_type == CHIP_KAVERI) {
2746 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2747
2748 if (!adev->gfx.mec2_fw)
2749 return -EINVAL;
2750
2751 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2752 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2753 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2754 adev->gfx.mec2_feature_version = le32_to_cpu(
2755 mec2_hdr->ucode_feature_version);
2756
2757
2758 fw_data = (const __le32 *)
2759 (adev->gfx.mec2_fw->data +
2760 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2761 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2762 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2763 for (i = 0; i < fw_size; i++)
2764 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2765 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2766 }
2767
2768 return 0;
2769}
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2780{
2781 int i;
2782
2783 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2784 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2785
2786 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2787 }
2788}
2789
2790static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2791{
2792 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2793}
2794
2795static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2796{
2797 int r;
2798 u32 *hpd;
2799 size_t mec_hpd_size;
2800
2801 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2802
2803
2804 amdgpu_gfx_compute_queue_acquire(adev);
2805
2806
2807 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2808 * GFX7_MEC_HPD_SIZE * 2;
2809
2810 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2811 AMDGPU_GEM_DOMAIN_VRAM,
2812 &adev->gfx.mec.hpd_eop_obj,
2813 &adev->gfx.mec.hpd_eop_gpu_addr,
2814 (void **)&hpd);
2815 if (r) {
2816 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2817 gfx_v7_0_mec_fini(adev);
2818 return r;
2819 }
2820
2821
2822 memset(hpd, 0, mec_hpd_size);
2823
2824 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2825 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2826
2827 return 0;
2828}
2829
2830struct hqd_registers
2831{
2832 u32 cp_mqd_base_addr;
2833 u32 cp_mqd_base_addr_hi;
2834 u32 cp_hqd_active;
2835 u32 cp_hqd_vmid;
2836 u32 cp_hqd_persistent_state;
2837 u32 cp_hqd_pipe_priority;
2838 u32 cp_hqd_queue_priority;
2839 u32 cp_hqd_quantum;
2840 u32 cp_hqd_pq_base;
2841 u32 cp_hqd_pq_base_hi;
2842 u32 cp_hqd_pq_rptr;
2843 u32 cp_hqd_pq_rptr_report_addr;
2844 u32 cp_hqd_pq_rptr_report_addr_hi;
2845 u32 cp_hqd_pq_wptr_poll_addr;
2846 u32 cp_hqd_pq_wptr_poll_addr_hi;
2847 u32 cp_hqd_pq_doorbell_control;
2848 u32 cp_hqd_pq_wptr;
2849 u32 cp_hqd_pq_control;
2850 u32 cp_hqd_ib_base_addr;
2851 u32 cp_hqd_ib_base_addr_hi;
2852 u32 cp_hqd_ib_rptr;
2853 u32 cp_hqd_ib_control;
2854 u32 cp_hqd_iq_timer;
2855 u32 cp_hqd_iq_rptr;
2856 u32 cp_hqd_dequeue_request;
2857 u32 cp_hqd_dma_offload;
2858 u32 cp_hqd_sema_cmd;
2859 u32 cp_hqd_msg_type;
2860 u32 cp_hqd_atomic0_preop_lo;
2861 u32 cp_hqd_atomic0_preop_hi;
2862 u32 cp_hqd_atomic1_preop_lo;
2863 u32 cp_hqd_atomic1_preop_hi;
2864 u32 cp_hqd_hq_scheduler0;
2865 u32 cp_hqd_hq_scheduler1;
2866 u32 cp_mqd_control;
2867};
2868
2869static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2870 int mec, int pipe)
2871{
2872 u64 eop_gpu_addr;
2873 u32 tmp;
2874 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2875 * GFX7_MEC_HPD_SIZE * 2;
2876
2877 mutex_lock(&adev->srbm_mutex);
2878 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2879
2880 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2881
2882
2883 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2884 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2885
2886
2887 WREG32(mmCP_HPD_EOP_VMID, 0);
2888
2889
2890 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2891 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2892 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2893 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2894
2895 cik_srbm_select(adev, 0, 0, 0, 0);
2896 mutex_unlock(&adev->srbm_mutex);
2897}
2898
2899static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2900{
2901 int i;
2902
2903
2904 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2905 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2906 for (i = 0; i < adev->usec_timeout; i++) {
2907 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2908 break;
2909 udelay(1);
2910 }
2911
2912 if (i == adev->usec_timeout)
2913 return -ETIMEDOUT;
2914
2915 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2916 WREG32(mmCP_HQD_PQ_RPTR, 0);
2917 WREG32(mmCP_HQD_PQ_WPTR, 0);
2918 }
2919
2920 return 0;
2921}
2922
2923static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2924 struct cik_mqd *mqd,
2925 uint64_t mqd_gpu_addr,
2926 struct amdgpu_ring *ring)
2927{
2928 u64 hqd_gpu_addr;
2929 u64 wb_gpu_addr;
2930
2931
2932 memset(mqd, 0, sizeof(struct cik_mqd));
2933
2934 mqd->header = 0xC0310800;
2935 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2936 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2937 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2938 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2939
2940
2941 mqd->cp_hqd_pq_doorbell_control =
2942 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2943 if (ring->use_doorbell)
2944 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2945 else
2946 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2947
2948
2949 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2950 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2951
2952
2953 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2954 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2955
2956
2957 hqd_gpu_addr = ring->gpu_addr >> 8;
2958 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2959 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2960
2961
2962 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2963 mqd->cp_hqd_pq_control &=
2964 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2965 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2966
2967 mqd->cp_hqd_pq_control |=
2968 order_base_2(ring->ring_size / 8);
2969 mqd->cp_hqd_pq_control |=
2970 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2971#ifdef __BIG_ENDIAN
2972 mqd->cp_hqd_pq_control |=
2973 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2974#endif
2975 mqd->cp_hqd_pq_control &=
2976 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2977 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2978 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2979 mqd->cp_hqd_pq_control |=
2980 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2981 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2982
2983
2984 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2985 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2986 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2987
2988
2989 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2990 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2991 mqd->cp_hqd_pq_rptr_report_addr_hi =
2992 upper_32_bits(wb_gpu_addr) & 0xffff;
2993
2994
2995 if (ring->use_doorbell) {
2996 mqd->cp_hqd_pq_doorbell_control =
2997 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2998 mqd->cp_hqd_pq_doorbell_control &=
2999 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3000 mqd->cp_hqd_pq_doorbell_control |=
3001 (ring->doorbell_index <<
3002 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3003 mqd->cp_hqd_pq_doorbell_control |=
3004 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3005 mqd->cp_hqd_pq_doorbell_control &=
3006 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3007 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3008
3009 } else {
3010 mqd->cp_hqd_pq_doorbell_control = 0;
3011 }
3012
3013
3014 ring->wptr = 0;
3015 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3016 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3017
3018
3019 mqd->cp_hqd_vmid = 0;
3020
3021
3022 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3023 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3024 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3025 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3026 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3027 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3028 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3029 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3030 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3031 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3032 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3033 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3034 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3035 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3036 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3037 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3038
3039
3040 mqd->cp_hqd_active = 1;
3041}
3042
3043static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3044{
3045 uint32_t tmp;
3046 uint32_t mqd_reg;
3047 uint32_t *mqd_data;
3048
3049
3050 mqd_data = &mqd->cp_mqd_base_addr_lo;
3051
3052
3053 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3054 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3055 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3056
3057
3058 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3059 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3060
3061
3062 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3063 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3064
3065 return 0;
3066}
3067
3068static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3069{
3070 int r;
3071 u64 mqd_gpu_addr;
3072 struct cik_mqd *mqd;
3073 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3074
3075 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3076 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3077 &mqd_gpu_addr, (void **)&mqd);
3078 if (r) {
3079 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3080 return r;
3081 }
3082
3083 mutex_lock(&adev->srbm_mutex);
3084 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3085
3086 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3087 gfx_v7_0_mqd_deactivate(adev);
3088 gfx_v7_0_mqd_commit(adev, mqd);
3089
3090 cik_srbm_select(adev, 0, 0, 0, 0);
3091 mutex_unlock(&adev->srbm_mutex);
3092
3093 amdgpu_bo_kunmap(ring->mqd_obj);
3094 amdgpu_bo_unreserve(ring->mqd_obj);
3095 return 0;
3096}
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3108{
3109 int r, i, j;
3110 u32 tmp;
3111 struct amdgpu_ring *ring;
3112
3113
3114 tmp = RREG32(mmCP_CPF_DEBUG);
3115 tmp |= (1 << 23);
3116 WREG32(mmCP_CPF_DEBUG, tmp);
3117
3118
3119 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3120 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3121 gfx_v7_0_compute_pipe_init(adev, i, j);
3122
3123
3124 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3125 r = gfx_v7_0_compute_queue_init(adev, i);
3126 if (r) {
3127 gfx_v7_0_cp_compute_fini(adev);
3128 return r;
3129 }
3130 }
3131
3132 gfx_v7_0_cp_compute_enable(adev, true);
3133
3134 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3135 ring = &adev->gfx.compute_ring[i];
3136 amdgpu_ring_test_helper(ring);
3137 }
3138
3139 return 0;
3140}
3141
3142static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3143{
3144 gfx_v7_0_cp_gfx_enable(adev, enable);
3145 gfx_v7_0_cp_compute_enable(adev, enable);
3146}
3147
3148static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3149{
3150 int r;
3151
3152 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3153 if (r)
3154 return r;
3155 r = gfx_v7_0_cp_compute_load_microcode(adev);
3156 if (r)
3157 return r;
3158
3159 return 0;
3160}
3161
3162static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3163 bool enable)
3164{
3165 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3166
3167 if (enable)
3168 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3169 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3170 else
3171 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3172 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3173 WREG32(mmCP_INT_CNTL_RING0, tmp);
3174}
3175
3176static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3177{
3178 int r;
3179
3180 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3181
3182 r = gfx_v7_0_cp_load_microcode(adev);
3183 if (r)
3184 return r;
3185
3186 r = gfx_v7_0_cp_gfx_resume(adev);
3187 if (r)
3188 return r;
3189 r = gfx_v7_0_cp_compute_resume(adev);
3190 if (r)
3191 return r;
3192
3193 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3194
3195 return 0;
3196}
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3207{
3208 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3209 uint32_t seq = ring->fence_drv.sync_seq;
3210 uint64_t addr = ring->fence_drv.gpu_addr;
3211
3212 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3213 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3214 WAIT_REG_MEM_FUNCTION(3) |
3215 WAIT_REG_MEM_ENGINE(usepfp)));
3216 amdgpu_ring_write(ring, addr & 0xfffffffc);
3217 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3218 amdgpu_ring_write(ring, seq);
3219 amdgpu_ring_write(ring, 0xffffffff);
3220 amdgpu_ring_write(ring, 4);
3221
3222 if (usepfp) {
3223
3224 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3225 amdgpu_ring_write(ring, 0);
3226 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3227 amdgpu_ring_write(ring, 0);
3228 }
3229}
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3248 unsigned vmid, uint64_t pd_addr)
3249{
3250 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3251
3252 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3253
3254
3255 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3256 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3257 WAIT_REG_MEM_FUNCTION(0) |
3258 WAIT_REG_MEM_ENGINE(0)));
3259 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3260 amdgpu_ring_write(ring, 0);
3261 amdgpu_ring_write(ring, 0);
3262 amdgpu_ring_write(ring, 0);
3263 amdgpu_ring_write(ring, 0x20);
3264
3265
3266 if (usepfp) {
3267
3268 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3269 amdgpu_ring_write(ring, 0x0);
3270
3271
3272 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3273 amdgpu_ring_write(ring, 0);
3274 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3275 amdgpu_ring_write(ring, 0);
3276 }
3277}
3278
3279static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3280 uint32_t reg, uint32_t val)
3281{
3282 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3283
3284 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3285 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3286 WRITE_DATA_DST_SEL(0)));
3287 amdgpu_ring_write(ring, reg);
3288 amdgpu_ring_write(ring, 0);
3289 amdgpu_ring_write(ring, val);
3290}
3291
3292
3293
3294
3295
3296
3297static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3298{
3299 const u32 *src_ptr;
3300 u32 dws;
3301 const struct cs_section_def *cs_data;
3302 int r;
3303
3304
3305 if (adev->flags & AMD_IS_APU) {
3306 if (adev->asic_type == CHIP_KAVERI) {
3307 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3308 adev->gfx.rlc.reg_list_size =
3309 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3310 } else {
3311 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3312 adev->gfx.rlc.reg_list_size =
3313 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3314 }
3315 }
3316 adev->gfx.rlc.cs_data = ci_cs_data;
3317 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3318 adev->gfx.rlc.cp_table_size += 64 * 1024;
3319
3320 src_ptr = adev->gfx.rlc.reg_list;
3321 dws = adev->gfx.rlc.reg_list_size;
3322 dws += (5 * 16) + 48 + 48 + 64;
3323
3324 cs_data = adev->gfx.rlc.cs_data;
3325
3326 if (src_ptr) {
3327
3328 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3329 if (r)
3330 return r;
3331 }
3332
3333 if (cs_data) {
3334
3335 r = amdgpu_gfx_rlc_init_csb(adev);
3336 if (r)
3337 return r;
3338 }
3339
3340 if (adev->gfx.rlc.cp_table_size) {
3341 r = amdgpu_gfx_rlc_init_cpt(adev);
3342 if (r)
3343 return r;
3344 }
3345
3346
3347 if (adev->gfx.rlc.funcs->update_spm_vmid)
3348 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
3349
3350 return 0;
3351}
3352
3353static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3354{
3355 u32 tmp;
3356
3357 tmp = RREG32(mmRLC_LB_CNTL);
3358 if (enable)
3359 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3360 else
3361 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3362 WREG32(mmRLC_LB_CNTL, tmp);
3363}
3364
3365static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3366{
3367 u32 i, j, k;
3368 u32 mask;
3369
3370 mutex_lock(&adev->grbm_idx_mutex);
3371 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3372 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3373 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3374 for (k = 0; k < adev->usec_timeout; k++) {
3375 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3376 break;
3377 udelay(1);
3378 }
3379 }
3380 }
3381 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3382 mutex_unlock(&adev->grbm_idx_mutex);
3383
3384 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3385 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3386 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3387 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3388 for (k = 0; k < adev->usec_timeout; k++) {
3389 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3390 break;
3391 udelay(1);
3392 }
3393}
3394
3395static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3396{
3397 u32 tmp;
3398
3399 tmp = RREG32(mmRLC_CNTL);
3400 if (tmp != rlc)
3401 WREG32(mmRLC_CNTL, rlc);
3402}
3403
3404static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3405{
3406 u32 data, orig;
3407
3408 orig = data = RREG32(mmRLC_CNTL);
3409
3410 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3411 u32 i;
3412
3413 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3414 WREG32(mmRLC_CNTL, data);
3415
3416 for (i = 0; i < adev->usec_timeout; i++) {
3417 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3418 break;
3419 udelay(1);
3420 }
3421
3422 gfx_v7_0_wait_for_rlc_serdes(adev);
3423 }
3424
3425 return orig;
3426}
3427
3428static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3429{
3430 return true;
3431}
3432
3433static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3434{
3435 u32 tmp, i, mask;
3436
3437 tmp = 0x1 | (1 << 1);
3438 WREG32(mmRLC_GPR_REG2, tmp);
3439
3440 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3441 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3442 for (i = 0; i < adev->usec_timeout; i++) {
3443 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3444 break;
3445 udelay(1);
3446 }
3447
3448 for (i = 0; i < adev->usec_timeout; i++) {
3449 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3450 break;
3451 udelay(1);
3452 }
3453}
3454
3455static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3456{
3457 u32 tmp;
3458
3459 tmp = 0x1 | (0 << 1);
3460 WREG32(mmRLC_GPR_REG2, tmp);
3461}
3462
3463
3464
3465
3466
3467
3468
3469
3470static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3471{
3472 WREG32(mmRLC_CNTL, 0);
3473
3474 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3475
3476 gfx_v7_0_wait_for_rlc_serdes(adev);
3477}
3478
3479
3480
3481
3482
3483
3484
3485
3486static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3487{
3488 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3489
3490 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3491
3492 udelay(50);
3493}
3494
3495static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3496{
3497 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3498
3499 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3500 WREG32(mmGRBM_SOFT_RESET, tmp);
3501 udelay(50);
3502 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3503 WREG32(mmGRBM_SOFT_RESET, tmp);
3504 udelay(50);
3505}
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3517{
3518 const struct rlc_firmware_header_v1_0 *hdr;
3519 const __le32 *fw_data;
3520 unsigned i, fw_size;
3521 u32 tmp;
3522
3523 if (!adev->gfx.rlc_fw)
3524 return -EINVAL;
3525
3526 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3527 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3528 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3529 adev->gfx.rlc_feature_version = le32_to_cpu(
3530 hdr->ucode_feature_version);
3531
3532 adev->gfx.rlc.funcs->stop(adev);
3533
3534
3535 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3536 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3537
3538 adev->gfx.rlc.funcs->reset(adev);
3539
3540 gfx_v7_0_init_pg(adev);
3541
3542 WREG32(mmRLC_LB_CNTR_INIT, 0);
3543 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3544
3545 mutex_lock(&adev->grbm_idx_mutex);
3546 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3547 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3548 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3549 WREG32(mmRLC_LB_CNTL, 0x80000004);
3550 mutex_unlock(&adev->grbm_idx_mutex);
3551
3552 WREG32(mmRLC_MC_CNTL, 0);
3553 WREG32(mmRLC_UCODE_CNTL, 0);
3554
3555 fw_data = (const __le32 *)
3556 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3557 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3558 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3559 for (i = 0; i < fw_size; i++)
3560 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3561 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3562
3563
3564 gfx_v7_0_enable_lbpw(adev, false);
3565
3566 if (adev->asic_type == CHIP_BONAIRE)
3567 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3568
3569 adev->gfx.rlc.funcs->start(adev);
3570
3571 return 0;
3572}
3573
3574static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
3575{
3576 u32 data;
3577
3578 data = RREG32(mmRLC_SPM_VMID);
3579
3580 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
3581 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
3582
3583 WREG32(mmRLC_SPM_VMID, data);
3584}
3585
3586static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3587{
3588 u32 data, orig, tmp, tmp2;
3589
3590 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3591
3592 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3593 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3594
3595 tmp = gfx_v7_0_halt_rlc(adev);
3596
3597 mutex_lock(&adev->grbm_idx_mutex);
3598 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3599 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3600 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3601 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3602 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3603 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3604 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3605 mutex_unlock(&adev->grbm_idx_mutex);
3606
3607 gfx_v7_0_update_rlc(adev, tmp);
3608
3609 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3610 if (orig != data)
3611 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3612
3613 } else {
3614 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3615
3616 RREG32(mmCB_CGTT_SCLK_CTRL);
3617 RREG32(mmCB_CGTT_SCLK_CTRL);
3618 RREG32(mmCB_CGTT_SCLK_CTRL);
3619 RREG32(mmCB_CGTT_SCLK_CTRL);
3620
3621 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3622 if (orig != data)
3623 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3624
3625 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3626 }
3627}
3628
3629static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3630{
3631 u32 data, orig, tmp = 0;
3632
3633 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3634 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3635 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3636 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3637 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3638 if (orig != data)
3639 WREG32(mmCP_MEM_SLP_CNTL, data);
3640 }
3641 }
3642
3643 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3644 data |= 0x00000001;
3645 data &= 0xfffffffd;
3646 if (orig != data)
3647 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3648
3649 tmp = gfx_v7_0_halt_rlc(adev);
3650
3651 mutex_lock(&adev->grbm_idx_mutex);
3652 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3654 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3655 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3656 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3657 WREG32(mmRLC_SERDES_WR_CTRL, data);
3658 mutex_unlock(&adev->grbm_idx_mutex);
3659
3660 gfx_v7_0_update_rlc(adev, tmp);
3661
3662 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3663 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3664 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3665 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3666 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3667 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3668 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3669 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3670 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3671 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3672 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3673 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3674 if (orig != data)
3675 WREG32(mmCGTS_SM_CTRL_REG, data);
3676 }
3677 } else {
3678 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3679 data |= 0x00000003;
3680 if (orig != data)
3681 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3682
3683 data = RREG32(mmRLC_MEM_SLP_CNTL);
3684 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3685 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3686 WREG32(mmRLC_MEM_SLP_CNTL, data);
3687 }
3688
3689 data = RREG32(mmCP_MEM_SLP_CNTL);
3690 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3691 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3692 WREG32(mmCP_MEM_SLP_CNTL, data);
3693 }
3694
3695 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3696 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3697 if (orig != data)
3698 WREG32(mmCGTS_SM_CTRL_REG, data);
3699
3700 tmp = gfx_v7_0_halt_rlc(adev);
3701
3702 mutex_lock(&adev->grbm_idx_mutex);
3703 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3704 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3705 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3706 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3707 WREG32(mmRLC_SERDES_WR_CTRL, data);
3708 mutex_unlock(&adev->grbm_idx_mutex);
3709
3710 gfx_v7_0_update_rlc(adev, tmp);
3711 }
3712}
3713
3714static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3715 bool enable)
3716{
3717 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3718
3719 if (enable) {
3720 gfx_v7_0_enable_mgcg(adev, true);
3721 gfx_v7_0_enable_cgcg(adev, true);
3722 } else {
3723 gfx_v7_0_enable_cgcg(adev, false);
3724 gfx_v7_0_enable_mgcg(adev, false);
3725 }
3726 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3727}
3728
3729static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3730 bool enable)
3731{
3732 u32 data, orig;
3733
3734 orig = data = RREG32(mmRLC_PG_CNTL);
3735 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3736 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3737 else
3738 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3739 if (orig != data)
3740 WREG32(mmRLC_PG_CNTL, data);
3741}
3742
3743static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3744 bool enable)
3745{
3746 u32 data, orig;
3747
3748 orig = data = RREG32(mmRLC_PG_CNTL);
3749 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3750 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3751 else
3752 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3753 if (orig != data)
3754 WREG32(mmRLC_PG_CNTL, data);
3755}
3756
3757static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3758{
3759 u32 data, orig;
3760
3761 orig = data = RREG32(mmRLC_PG_CNTL);
3762 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3763 data &= ~0x8000;
3764 else
3765 data |= 0x8000;
3766 if (orig != data)
3767 WREG32(mmRLC_PG_CNTL, data);
3768}
3769
3770static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3771{
3772 u32 data, orig;
3773
3774 orig = data = RREG32(mmRLC_PG_CNTL);
3775 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3776 data &= ~0x2000;
3777 else
3778 data |= 0x2000;
3779 if (orig != data)
3780 WREG32(mmRLC_PG_CNTL, data);
3781}
3782
3783static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3784{
3785 if (adev->asic_type == CHIP_KAVERI)
3786 return 5;
3787 else
3788 return 4;
3789}
3790
3791static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3792 bool enable)
3793{
3794 u32 data, orig;
3795
3796 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3797 orig = data = RREG32(mmRLC_PG_CNTL);
3798 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3799 if (orig != data)
3800 WREG32(mmRLC_PG_CNTL, data);
3801
3802 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3803 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3804 if (orig != data)
3805 WREG32(mmRLC_AUTO_PG_CTRL, data);
3806 } else {
3807 orig = data = RREG32(mmRLC_PG_CNTL);
3808 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3809 if (orig != data)
3810 WREG32(mmRLC_PG_CNTL, data);
3811
3812 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3813 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3814 if (orig != data)
3815 WREG32(mmRLC_AUTO_PG_CTRL, data);
3816
3817 data = RREG32(mmDB_RENDER_CONTROL);
3818 }
3819}
3820
3821static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3822 u32 bitmap)
3823{
3824 u32 data;
3825
3826 if (!bitmap)
3827 return;
3828
3829 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3830 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3831
3832 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3833}
3834
3835static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3836{
3837 u32 data, mask;
3838
3839 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3840 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3841
3842 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3843 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3844
3845 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3846
3847 return (~data) & mask;
3848}
3849
3850static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3851{
3852 u32 tmp;
3853
3854 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3855
3856 tmp = RREG32(mmRLC_MAX_PG_CU);
3857 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3858 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3859 WREG32(mmRLC_MAX_PG_CU, tmp);
3860}
3861
3862static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3863 bool enable)
3864{
3865 u32 data, orig;
3866
3867 orig = data = RREG32(mmRLC_PG_CNTL);
3868 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3869 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3870 else
3871 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3872 if (orig != data)
3873 WREG32(mmRLC_PG_CNTL, data);
3874}
3875
3876static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3877 bool enable)
3878{
3879 u32 data, orig;
3880
3881 orig = data = RREG32(mmRLC_PG_CNTL);
3882 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3883 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3884 else
3885 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3886 if (orig != data)
3887 WREG32(mmRLC_PG_CNTL, data);
3888}
3889
3890#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3891#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3892
3893static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3894{
3895 u32 data, orig;
3896 u32 i;
3897
3898 if (adev->gfx.rlc.cs_data) {
3899 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3900 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3901 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3902 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3903 } else {
3904 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3905 for (i = 0; i < 3; i++)
3906 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3907 }
3908 if (adev->gfx.rlc.reg_list) {
3909 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3910 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3911 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3912 }
3913
3914 orig = data = RREG32(mmRLC_PG_CNTL);
3915 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3916 if (orig != data)
3917 WREG32(mmRLC_PG_CNTL, data);
3918
3919 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3920 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3921
3922 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3923 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3924 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3925 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3926
3927 data = 0x10101010;
3928 WREG32(mmRLC_PG_DELAY, data);
3929
3930 data = RREG32(mmRLC_PG_DELAY_2);
3931 data &= ~0xff;
3932 data |= 0x3;
3933 WREG32(mmRLC_PG_DELAY_2, data);
3934
3935 data = RREG32(mmRLC_AUTO_PG_CTRL);
3936 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3937 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3938 WREG32(mmRLC_AUTO_PG_CTRL, data);
3939
3940}
3941
3942static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
3943{
3944 gfx_v7_0_enable_gfx_cgpg(adev, enable);
3945 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
3946 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
3947}
3948
3949static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
3950{
3951 u32 count = 0;
3952 const struct cs_section_def *sect = NULL;
3953 const struct cs_extent_def *ext = NULL;
3954
3955 if (adev->gfx.rlc.cs_data == NULL)
3956 return 0;
3957
3958
3959 count += 2;
3960
3961 count += 3;
3962
3963 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3964 for (ext = sect->section; ext->extent != NULL; ++ext) {
3965 if (sect->id == SECT_CONTEXT)
3966 count += 2 + ext->reg_count;
3967 else
3968 return 0;
3969 }
3970 }
3971
3972 count += 4;
3973
3974 count += 2;
3975
3976 count += 2;
3977
3978 return count;
3979}
3980
3981static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
3982 volatile u32 *buffer)
3983{
3984 u32 count = 0, i;
3985 const struct cs_section_def *sect = NULL;
3986 const struct cs_extent_def *ext = NULL;
3987
3988 if (adev->gfx.rlc.cs_data == NULL)
3989 return;
3990 if (buffer == NULL)
3991 return;
3992
3993 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3994 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3995
3996 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3997 buffer[count++] = cpu_to_le32(0x80000000);
3998 buffer[count++] = cpu_to_le32(0x80000000);
3999
4000 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4001 for (ext = sect->section; ext->extent != NULL; ++ext) {
4002 if (sect->id == SECT_CONTEXT) {
4003 buffer[count++] =
4004 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4005 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4006 for (i = 0; i < ext->reg_count; i++)
4007 buffer[count++] = cpu_to_le32(ext->extent[i]);
4008 } else {
4009 return;
4010 }
4011 }
4012 }
4013
4014 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4015 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4016 switch (adev->asic_type) {
4017 case CHIP_BONAIRE:
4018 buffer[count++] = cpu_to_le32(0x16000012);
4019 buffer[count++] = cpu_to_le32(0x00000000);
4020 break;
4021 case CHIP_KAVERI:
4022 buffer[count++] = cpu_to_le32(0x00000000);
4023 buffer[count++] = cpu_to_le32(0x00000000);
4024 break;
4025 case CHIP_KABINI:
4026 case CHIP_MULLINS:
4027 buffer[count++] = cpu_to_le32(0x00000000);
4028 buffer[count++] = cpu_to_le32(0x00000000);
4029 break;
4030 case CHIP_HAWAII:
4031 buffer[count++] = cpu_to_le32(0x3a00161a);
4032 buffer[count++] = cpu_to_le32(0x0000002e);
4033 break;
4034 default:
4035 buffer[count++] = cpu_to_le32(0x00000000);
4036 buffer[count++] = cpu_to_le32(0x00000000);
4037 break;
4038 }
4039
4040 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4041 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4042
4043 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4044 buffer[count++] = cpu_to_le32(0);
4045}
4046
4047static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4048{
4049 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4050 AMD_PG_SUPPORT_GFX_SMG |
4051 AMD_PG_SUPPORT_GFX_DMG |
4052 AMD_PG_SUPPORT_CP |
4053 AMD_PG_SUPPORT_GDS |
4054 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4055 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4056 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4057 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4058 gfx_v7_0_init_gfx_cgpg(adev);
4059 gfx_v7_0_enable_cp_pg(adev, true);
4060 gfx_v7_0_enable_gds_pg(adev, true);
4061 }
4062 gfx_v7_0_init_ao_cu_mask(adev);
4063 gfx_v7_0_update_gfx_pg(adev, true);
4064 }
4065}
4066
4067static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4068{
4069 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4070 AMD_PG_SUPPORT_GFX_SMG |
4071 AMD_PG_SUPPORT_GFX_DMG |
4072 AMD_PG_SUPPORT_CP |
4073 AMD_PG_SUPPORT_GDS |
4074 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4075 gfx_v7_0_update_gfx_pg(adev, false);
4076 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4077 gfx_v7_0_enable_cp_pg(adev, false);
4078 gfx_v7_0_enable_gds_pg(adev, false);
4079 }
4080 }
4081}
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4092{
4093 uint64_t clock;
4094
4095 mutex_lock(&adev->gfx.gpu_clock_mutex);
4096 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4097 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4098 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4099 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4100 return clock;
4101}
4102
4103static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4104 uint32_t vmid,
4105 uint32_t gds_base, uint32_t gds_size,
4106 uint32_t gws_base, uint32_t gws_size,
4107 uint32_t oa_base, uint32_t oa_size)
4108{
4109
4110 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4111 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4112 WRITE_DATA_DST_SEL(0)));
4113 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4114 amdgpu_ring_write(ring, 0);
4115 amdgpu_ring_write(ring, gds_base);
4116
4117
4118 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4119 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4120 WRITE_DATA_DST_SEL(0)));
4121 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4122 amdgpu_ring_write(ring, 0);
4123 amdgpu_ring_write(ring, gds_size);
4124
4125
4126 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4127 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4128 WRITE_DATA_DST_SEL(0)));
4129 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4130 amdgpu_ring_write(ring, 0);
4131 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4132
4133
4134 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4135 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4136 WRITE_DATA_DST_SEL(0)));
4137 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4138 amdgpu_ring_write(ring, 0);
4139 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4140}
4141
4142static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4143{
4144 struct amdgpu_device *adev = ring->adev;
4145 uint32_t value = 0;
4146
4147 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4148 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4149 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4150 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4151 WREG32(mmSQ_CMD, value);
4152}
4153
4154static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4155{
4156 WREG32(mmSQ_IND_INDEX,
4157 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4158 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4159 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4160 (SQ_IND_INDEX__FORCE_READ_MASK));
4161 return RREG32(mmSQ_IND_DATA);
4162}
4163
4164static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4165 uint32_t wave, uint32_t thread,
4166 uint32_t regno, uint32_t num, uint32_t *out)
4167{
4168 WREG32(mmSQ_IND_INDEX,
4169 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4170 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4171 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4172 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4173 (SQ_IND_INDEX__FORCE_READ_MASK) |
4174 (SQ_IND_INDEX__AUTO_INCR_MASK));
4175 while (num--)
4176 *(out++) = RREG32(mmSQ_IND_DATA);
4177}
4178
4179static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4180{
4181
4182 dst[(*no_fields)++] = 0;
4183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4198 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4199 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4200 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4201 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
4202}
4203
4204static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4205 uint32_t wave, uint32_t start,
4206 uint32_t size, uint32_t *dst)
4207{
4208 wave_read_regs(
4209 adev, simd, wave, 0,
4210 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4211}
4212
4213static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4214 u32 me, u32 pipe, u32 q, u32 vm)
4215{
4216 cik_srbm_select(adev, me, pipe, q, vm);
4217}
4218
4219static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4220 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4221 .select_se_sh = &gfx_v7_0_select_se_sh,
4222 .read_wave_data = &gfx_v7_0_read_wave_data,
4223 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4224 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4225};
4226
4227static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4228 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4229 .set_safe_mode = gfx_v7_0_set_safe_mode,
4230 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4231 .init = gfx_v7_0_rlc_init,
4232 .get_csb_size = gfx_v7_0_get_csb_size,
4233 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4234 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4235 .resume = gfx_v7_0_rlc_resume,
4236 .stop = gfx_v7_0_rlc_stop,
4237 .reset = gfx_v7_0_rlc_reset,
4238 .start = gfx_v7_0_rlc_start,
4239 .update_spm_vmid = gfx_v7_0_update_spm_vmid
4240};
4241
4242static int gfx_v7_0_early_init(void *handle)
4243{
4244 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4245
4246 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4247 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4248 AMDGPU_MAX_COMPUTE_RINGS);
4249 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4250 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4251 gfx_v7_0_set_ring_funcs(adev);
4252 gfx_v7_0_set_irq_funcs(adev);
4253 gfx_v7_0_set_gds_init(adev);
4254
4255 return 0;
4256}
4257
4258static int gfx_v7_0_late_init(void *handle)
4259{
4260 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4261 int r;
4262
4263 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4264 if (r)
4265 return r;
4266
4267 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4268 if (r)
4269 return r;
4270
4271 return 0;
4272}
4273
4274static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4275{
4276 u32 gb_addr_config;
4277 u32 mc_arb_ramcfg;
4278 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4279 u32 tmp;
4280
4281 switch (adev->asic_type) {
4282 case CHIP_BONAIRE:
4283 adev->gfx.config.max_shader_engines = 2;
4284 adev->gfx.config.max_tile_pipes = 4;
4285 adev->gfx.config.max_cu_per_sh = 7;
4286 adev->gfx.config.max_sh_per_se = 1;
4287 adev->gfx.config.max_backends_per_se = 2;
4288 adev->gfx.config.max_texture_channel_caches = 4;
4289 adev->gfx.config.max_gprs = 256;
4290 adev->gfx.config.max_gs_threads = 32;
4291 adev->gfx.config.max_hw_contexts = 8;
4292
4293 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4294 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4295 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4296 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4297 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4298 break;
4299 case CHIP_HAWAII:
4300 adev->gfx.config.max_shader_engines = 4;
4301 adev->gfx.config.max_tile_pipes = 16;
4302 adev->gfx.config.max_cu_per_sh = 11;
4303 adev->gfx.config.max_sh_per_se = 1;
4304 adev->gfx.config.max_backends_per_se = 4;
4305 adev->gfx.config.max_texture_channel_caches = 16;
4306 adev->gfx.config.max_gprs = 256;
4307 adev->gfx.config.max_gs_threads = 32;
4308 adev->gfx.config.max_hw_contexts = 8;
4309
4310 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4311 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4312 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4313 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4314 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4315 break;
4316 case CHIP_KAVERI:
4317 adev->gfx.config.max_shader_engines = 1;
4318 adev->gfx.config.max_tile_pipes = 4;
4319 adev->gfx.config.max_cu_per_sh = 8;
4320 adev->gfx.config.max_backends_per_se = 2;
4321 adev->gfx.config.max_sh_per_se = 1;
4322 adev->gfx.config.max_texture_channel_caches = 4;
4323 adev->gfx.config.max_gprs = 256;
4324 adev->gfx.config.max_gs_threads = 16;
4325 adev->gfx.config.max_hw_contexts = 8;
4326
4327 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4328 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4329 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4330 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4331 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4332 break;
4333 case CHIP_KABINI:
4334 case CHIP_MULLINS:
4335 default:
4336 adev->gfx.config.max_shader_engines = 1;
4337 adev->gfx.config.max_tile_pipes = 2;
4338 adev->gfx.config.max_cu_per_sh = 2;
4339 adev->gfx.config.max_sh_per_se = 1;
4340 adev->gfx.config.max_backends_per_se = 1;
4341 adev->gfx.config.max_texture_channel_caches = 2;
4342 adev->gfx.config.max_gprs = 256;
4343 adev->gfx.config.max_gs_threads = 16;
4344 adev->gfx.config.max_hw_contexts = 8;
4345
4346 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4347 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4348 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4349 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4350 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4351 break;
4352 }
4353
4354 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4355 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4356
4357 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
4358 MC_ARB_RAMCFG, NOOFBANK);
4359 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
4360 MC_ARB_RAMCFG, NOOFRANKS);
4361
4362 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4363 adev->gfx.config.mem_max_burst_length_bytes = 256;
4364 if (adev->flags & AMD_IS_APU) {
4365
4366 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4367 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4368 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4369
4370 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4371 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4372 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4373
4374
4375 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4376 dimm00_addr_map = 0;
4377 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4378 dimm01_addr_map = 0;
4379 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4380 dimm10_addr_map = 0;
4381 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4382 dimm11_addr_map = 0;
4383
4384
4385
4386 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4387 adev->gfx.config.mem_row_size_in_kb = 2;
4388 else
4389 adev->gfx.config.mem_row_size_in_kb = 1;
4390 } else {
4391 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4392 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4393 if (adev->gfx.config.mem_row_size_in_kb > 4)
4394 adev->gfx.config.mem_row_size_in_kb = 4;
4395 }
4396
4397 adev->gfx.config.shader_engine_tile_size = 32;
4398 adev->gfx.config.num_gpus = 1;
4399 adev->gfx.config.multi_gpu_tile_size = 64;
4400
4401
4402 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4403 switch (adev->gfx.config.mem_row_size_in_kb) {
4404 case 1:
4405 default:
4406 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4407 break;
4408 case 2:
4409 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4410 break;
4411 case 4:
4412 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4413 break;
4414 }
4415 adev->gfx.config.gb_addr_config = gb_addr_config;
4416}
4417
4418static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4419 int mec, int pipe, int queue)
4420{
4421 int r;
4422 unsigned irq_type;
4423 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4424
4425
4426 ring->me = mec + 1;
4427 ring->pipe = pipe;
4428 ring->queue = queue;
4429
4430 ring->ring_obj = NULL;
4431 ring->use_doorbell = true;
4432 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
4433 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4434
4435 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4436 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4437 + ring->pipe;
4438
4439
4440 r = amdgpu_ring_init(adev, ring, 1024,
4441 &adev->gfx.eop_irq, irq_type,
4442 AMDGPU_RING_PRIO_DEFAULT, NULL);
4443 if (r)
4444 return r;
4445
4446
4447 return 0;
4448}
4449
4450static int gfx_v7_0_sw_init(void *handle)
4451{
4452 struct amdgpu_ring *ring;
4453 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4454 int i, j, k, r, ring_id;
4455
4456 switch (adev->asic_type) {
4457 case CHIP_KAVERI:
4458 adev->gfx.mec.num_mec = 2;
4459 break;
4460 case CHIP_BONAIRE:
4461 case CHIP_HAWAII:
4462 case CHIP_KABINI:
4463 case CHIP_MULLINS:
4464 default:
4465 adev->gfx.mec.num_mec = 1;
4466 break;
4467 }
4468 adev->gfx.mec.num_pipe_per_mec = 4;
4469 adev->gfx.mec.num_queue_per_pipe = 8;
4470
4471
4472 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4473 if (r)
4474 return r;
4475
4476
4477 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4478 &adev->gfx.priv_reg_irq);
4479 if (r)
4480 return r;
4481
4482
4483 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4484 &adev->gfx.priv_inst_irq);
4485 if (r)
4486 return r;
4487
4488 gfx_v7_0_scratch_init(adev);
4489
4490 r = gfx_v7_0_init_microcode(adev);
4491 if (r) {
4492 DRM_ERROR("Failed to load gfx firmware!\n");
4493 return r;
4494 }
4495
4496 r = adev->gfx.rlc.funcs->init(adev);
4497 if (r) {
4498 DRM_ERROR("Failed to init rlc BOs!\n");
4499 return r;
4500 }
4501
4502
4503 r = gfx_v7_0_mec_init(adev);
4504 if (r) {
4505 DRM_ERROR("Failed to init MEC BOs!\n");
4506 return r;
4507 }
4508
4509 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4510 ring = &adev->gfx.gfx_ring[i];
4511 ring->ring_obj = NULL;
4512 sprintf(ring->name, "gfx");
4513 r = amdgpu_ring_init(adev, ring, 1024,
4514 &adev->gfx.eop_irq,
4515 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
4516 AMDGPU_RING_PRIO_DEFAULT, NULL);
4517 if (r)
4518 return r;
4519 }
4520
4521
4522 ring_id = 0;
4523 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4524 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4525 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4526 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4527 continue;
4528
4529 r = gfx_v7_0_compute_ring_init(adev,
4530 ring_id,
4531 i, k, j);
4532 if (r)
4533 return r;
4534
4535 ring_id++;
4536 }
4537 }
4538 }
4539
4540 adev->gfx.ce_ram_size = 0x8000;
4541
4542 gfx_v7_0_gpu_early_init(adev);
4543
4544 return r;
4545}
4546
4547static int gfx_v7_0_sw_fini(void *handle)
4548{
4549 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4550 int i;
4551
4552 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4553 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4554 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4555 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4556
4557 gfx_v7_0_cp_compute_fini(adev);
4558 amdgpu_gfx_rlc_fini(adev);
4559 gfx_v7_0_mec_fini(adev);
4560 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4561 &adev->gfx.rlc.clear_state_gpu_addr,
4562 (void **)&adev->gfx.rlc.cs_ptr);
4563 if (adev->gfx.rlc.cp_table_size) {
4564 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4565 &adev->gfx.rlc.cp_table_gpu_addr,
4566 (void **)&adev->gfx.rlc.cp_table_ptr);
4567 }
4568 gfx_v7_0_free_microcode(adev);
4569
4570 return 0;
4571}
4572
4573static int gfx_v7_0_hw_init(void *handle)
4574{
4575 int r;
4576 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4577
4578 gfx_v7_0_constants_init(adev);
4579
4580
4581 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
4582
4583 r = adev->gfx.rlc.funcs->resume(adev);
4584 if (r)
4585 return r;
4586
4587 r = gfx_v7_0_cp_resume(adev);
4588 if (r)
4589 return r;
4590
4591 return r;
4592}
4593
4594static int gfx_v7_0_hw_fini(void *handle)
4595{
4596 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4597
4598 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4599 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4600 gfx_v7_0_cp_enable(adev, false);
4601 adev->gfx.rlc.funcs->stop(adev);
4602 gfx_v7_0_fini_pg(adev);
4603
4604 return 0;
4605}
4606
4607static int gfx_v7_0_suspend(void *handle)
4608{
4609 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4610
4611 return gfx_v7_0_hw_fini(adev);
4612}
4613
4614static int gfx_v7_0_resume(void *handle)
4615{
4616 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4617
4618 return gfx_v7_0_hw_init(adev);
4619}
4620
4621static bool gfx_v7_0_is_idle(void *handle)
4622{
4623 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4624
4625 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4626 return false;
4627 else
4628 return true;
4629}
4630
4631static int gfx_v7_0_wait_for_idle(void *handle)
4632{
4633 unsigned i;
4634 u32 tmp;
4635 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4636
4637 for (i = 0; i < adev->usec_timeout; i++) {
4638
4639 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4640
4641 if (!tmp)
4642 return 0;
4643 udelay(1);
4644 }
4645 return -ETIMEDOUT;
4646}
4647
4648static int gfx_v7_0_soft_reset(void *handle)
4649{
4650 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4651 u32 tmp;
4652 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4653
4654
4655 tmp = RREG32(mmGRBM_STATUS);
4656 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4657 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4658 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4659 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4660 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4661 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4662 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4663 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4664
4665 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4666 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4667 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4668 }
4669
4670
4671 tmp = RREG32(mmGRBM_STATUS2);
4672 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4673 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4674
4675
4676 tmp = RREG32(mmSRBM_STATUS);
4677 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4678 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4679
4680 if (grbm_soft_reset || srbm_soft_reset) {
4681
4682 gfx_v7_0_fini_pg(adev);
4683 gfx_v7_0_update_cg(adev, false);
4684
4685
4686 adev->gfx.rlc.funcs->stop(adev);
4687
4688
4689 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4690
4691
4692 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4693
4694 if (grbm_soft_reset) {
4695 tmp = RREG32(mmGRBM_SOFT_RESET);
4696 tmp |= grbm_soft_reset;
4697 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4698 WREG32(mmGRBM_SOFT_RESET, tmp);
4699 tmp = RREG32(mmGRBM_SOFT_RESET);
4700
4701 udelay(50);
4702
4703 tmp &= ~grbm_soft_reset;
4704 WREG32(mmGRBM_SOFT_RESET, tmp);
4705 tmp = RREG32(mmGRBM_SOFT_RESET);
4706 }
4707
4708 if (srbm_soft_reset) {
4709 tmp = RREG32(mmSRBM_SOFT_RESET);
4710 tmp |= srbm_soft_reset;
4711 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4712 WREG32(mmSRBM_SOFT_RESET, tmp);
4713 tmp = RREG32(mmSRBM_SOFT_RESET);
4714
4715 udelay(50);
4716
4717 tmp &= ~srbm_soft_reset;
4718 WREG32(mmSRBM_SOFT_RESET, tmp);
4719 tmp = RREG32(mmSRBM_SOFT_RESET);
4720 }
4721
4722 udelay(50);
4723 }
4724 return 0;
4725}
4726
4727static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4728 enum amdgpu_interrupt_state state)
4729{
4730 u32 cp_int_cntl;
4731
4732 switch (state) {
4733 case AMDGPU_IRQ_STATE_DISABLE:
4734 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4735 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4736 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4737 break;
4738 case AMDGPU_IRQ_STATE_ENABLE:
4739 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4740 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4741 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4742 break;
4743 default:
4744 break;
4745 }
4746}
4747
4748static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4749 int me, int pipe,
4750 enum amdgpu_interrupt_state state)
4751{
4752 u32 mec_int_cntl, mec_int_cntl_reg;
4753
4754
4755
4756
4757
4758
4759
4760 if (me == 1) {
4761 switch (pipe) {
4762 case 0:
4763 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4764 break;
4765 case 1:
4766 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4767 break;
4768 case 2:
4769 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4770 break;
4771 case 3:
4772 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4773 break;
4774 default:
4775 DRM_DEBUG("invalid pipe %d\n", pipe);
4776 return;
4777 }
4778 } else {
4779 DRM_DEBUG("invalid me %d\n", me);
4780 return;
4781 }
4782
4783 switch (state) {
4784 case AMDGPU_IRQ_STATE_DISABLE:
4785 mec_int_cntl = RREG32(mec_int_cntl_reg);
4786 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4787 WREG32(mec_int_cntl_reg, mec_int_cntl);
4788 break;
4789 case AMDGPU_IRQ_STATE_ENABLE:
4790 mec_int_cntl = RREG32(mec_int_cntl_reg);
4791 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4792 WREG32(mec_int_cntl_reg, mec_int_cntl);
4793 break;
4794 default:
4795 break;
4796 }
4797}
4798
4799static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4800 struct amdgpu_irq_src *src,
4801 unsigned type,
4802 enum amdgpu_interrupt_state state)
4803{
4804 u32 cp_int_cntl;
4805
4806 switch (state) {
4807 case AMDGPU_IRQ_STATE_DISABLE:
4808 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4809 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4810 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4811 break;
4812 case AMDGPU_IRQ_STATE_ENABLE:
4813 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4814 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4815 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4816 break;
4817 default:
4818 break;
4819 }
4820
4821 return 0;
4822}
4823
4824static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4825 struct amdgpu_irq_src *src,
4826 unsigned type,
4827 enum amdgpu_interrupt_state state)
4828{
4829 u32 cp_int_cntl;
4830
4831 switch (state) {
4832 case AMDGPU_IRQ_STATE_DISABLE:
4833 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4834 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4835 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4836 break;
4837 case AMDGPU_IRQ_STATE_ENABLE:
4838 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4839 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4840 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4841 break;
4842 default:
4843 break;
4844 }
4845
4846 return 0;
4847}
4848
4849static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4850 struct amdgpu_irq_src *src,
4851 unsigned type,
4852 enum amdgpu_interrupt_state state)
4853{
4854 switch (type) {
4855 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4856 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4857 break;
4858 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4859 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4860 break;
4861 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4862 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4863 break;
4864 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4865 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4866 break;
4867 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4868 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4869 break;
4870 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4871 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4872 break;
4873 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4874 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4875 break;
4876 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4877 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4878 break;
4879 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4880 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4881 break;
4882 default:
4883 break;
4884 }
4885 return 0;
4886}
4887
4888static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4889 struct amdgpu_irq_src *source,
4890 struct amdgpu_iv_entry *entry)
4891{
4892 u8 me_id, pipe_id;
4893 struct amdgpu_ring *ring;
4894 int i;
4895
4896 DRM_DEBUG("IH: CP EOP\n");
4897 me_id = (entry->ring_id & 0x0c) >> 2;
4898 pipe_id = (entry->ring_id & 0x03) >> 0;
4899 switch (me_id) {
4900 case 0:
4901 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4902 break;
4903 case 1:
4904 case 2:
4905 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4906 ring = &adev->gfx.compute_ring[i];
4907 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4908 amdgpu_fence_process(ring);
4909 }
4910 break;
4911 }
4912 return 0;
4913}
4914
4915static void gfx_v7_0_fault(struct amdgpu_device *adev,
4916 struct amdgpu_iv_entry *entry)
4917{
4918 struct amdgpu_ring *ring;
4919 u8 me_id, pipe_id;
4920 int i;
4921
4922 me_id = (entry->ring_id & 0x0c) >> 2;
4923 pipe_id = (entry->ring_id & 0x03) >> 0;
4924 switch (me_id) {
4925 case 0:
4926 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4927 break;
4928 case 1:
4929 case 2:
4930 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4931 ring = &adev->gfx.compute_ring[i];
4932 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4933 drm_sched_fault(&ring->sched);
4934 }
4935 break;
4936 }
4937}
4938
4939static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4940 struct amdgpu_irq_src *source,
4941 struct amdgpu_iv_entry *entry)
4942{
4943 DRM_ERROR("Illegal register access in command stream\n");
4944 gfx_v7_0_fault(adev, entry);
4945 return 0;
4946}
4947
4948static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4949 struct amdgpu_irq_src *source,
4950 struct amdgpu_iv_entry *entry)
4951{
4952 DRM_ERROR("Illegal instruction in command stream\n");
4953
4954 gfx_v7_0_fault(adev, entry);
4955 return 0;
4956}
4957
4958static int gfx_v7_0_set_clockgating_state(void *handle,
4959 enum amd_clockgating_state state)
4960{
4961 bool gate = false;
4962 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4963
4964 if (state == AMD_CG_STATE_GATE)
4965 gate = true;
4966
4967 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4968
4969 if (gate) {
4970 gfx_v7_0_enable_mgcg(adev, true);
4971 gfx_v7_0_enable_cgcg(adev, true);
4972 } else {
4973 gfx_v7_0_enable_cgcg(adev, false);
4974 gfx_v7_0_enable_mgcg(adev, false);
4975 }
4976 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4977
4978 return 0;
4979}
4980
4981static int gfx_v7_0_set_powergating_state(void *handle,
4982 enum amd_powergating_state state)
4983{
4984 bool gate = false;
4985 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4986
4987 if (state == AMD_PG_STATE_GATE)
4988 gate = true;
4989
4990 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4991 AMD_PG_SUPPORT_GFX_SMG |
4992 AMD_PG_SUPPORT_GFX_DMG |
4993 AMD_PG_SUPPORT_CP |
4994 AMD_PG_SUPPORT_GDS |
4995 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4996 gfx_v7_0_update_gfx_pg(adev, gate);
4997 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4998 gfx_v7_0_enable_cp_pg(adev, gate);
4999 gfx_v7_0_enable_gds_pg(adev, gate);
5000 }
5001 }
5002
5003 return 0;
5004}
5005
5006static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
5007{
5008 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
5009 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
5010 PACKET3_TC_ACTION_ENA |
5011 PACKET3_SH_KCACHE_ACTION_ENA |
5012 PACKET3_SH_ICACHE_ACTION_ENA);
5013 amdgpu_ring_write(ring, 0xffffffff);
5014 amdgpu_ring_write(ring, 0);
5015 amdgpu_ring_write(ring, 0x0000000A);
5016}
5017
5018static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
5019{
5020 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
5021 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
5022 PACKET3_TC_ACTION_ENA |
5023 PACKET3_SH_KCACHE_ACTION_ENA |
5024 PACKET3_SH_ICACHE_ACTION_ENA);
5025 amdgpu_ring_write(ring, 0xffffffff);
5026 amdgpu_ring_write(ring, 0xff);
5027 amdgpu_ring_write(ring, 0);
5028 amdgpu_ring_write(ring, 0);
5029 amdgpu_ring_write(ring, 0x0000000A);
5030}
5031
5032static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5033 .name = "gfx_v7_0",
5034 .early_init = gfx_v7_0_early_init,
5035 .late_init = gfx_v7_0_late_init,
5036 .sw_init = gfx_v7_0_sw_init,
5037 .sw_fini = gfx_v7_0_sw_fini,
5038 .hw_init = gfx_v7_0_hw_init,
5039 .hw_fini = gfx_v7_0_hw_fini,
5040 .suspend = gfx_v7_0_suspend,
5041 .resume = gfx_v7_0_resume,
5042 .is_idle = gfx_v7_0_is_idle,
5043 .wait_for_idle = gfx_v7_0_wait_for_idle,
5044 .soft_reset = gfx_v7_0_soft_reset,
5045 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5046 .set_powergating_state = gfx_v7_0_set_powergating_state,
5047};
5048
5049static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5050 .type = AMDGPU_RING_TYPE_GFX,
5051 .align_mask = 0xff,
5052 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5053 .support_64bit_ptrs = false,
5054 .get_rptr = gfx_v7_0_ring_get_rptr,
5055 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5056 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5057 .emit_frame_size =
5058 20 +
5059 7 +
5060 5 +
5061 12 + 12 + 12 +
5062 7 + 4 +
5063 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
5064 3 + 4 +
5065 5,
5066 .emit_ib_size = 4,
5067 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5068 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5069 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5070 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5071 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5072 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5073 .test_ring = gfx_v7_0_ring_test_ring,
5074 .test_ib = gfx_v7_0_ring_test_ib,
5075 .insert_nop = amdgpu_ring_insert_nop,
5076 .pad_ib = amdgpu_ring_generic_pad_ib,
5077 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5078 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5079 .soft_recovery = gfx_v7_0_ring_soft_recovery,
5080 .emit_mem_sync = gfx_v7_0_emit_mem_sync,
5081};
5082
5083static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5084 .type = AMDGPU_RING_TYPE_COMPUTE,
5085 .align_mask = 0xff,
5086 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5087 .support_64bit_ptrs = false,
5088 .get_rptr = gfx_v7_0_ring_get_rptr,
5089 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5090 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5091 .emit_frame_size =
5092 20 +
5093 7 +
5094 5 +
5095 7 +
5096 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5097 7 + 7 + 7 +
5098 7,
5099 .emit_ib_size = 7,
5100 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5101 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5102 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5103 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5104 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5105 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5106 .test_ring = gfx_v7_0_ring_test_ring,
5107 .test_ib = gfx_v7_0_ring_test_ib,
5108 .insert_nop = amdgpu_ring_insert_nop,
5109 .pad_ib = amdgpu_ring_generic_pad_ib,
5110 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5111 .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
5112};
5113
5114static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5115{
5116 int i;
5117
5118 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5119 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5120 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5121 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5122}
5123
5124static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5125 .set = gfx_v7_0_set_eop_interrupt_state,
5126 .process = gfx_v7_0_eop_irq,
5127};
5128
5129static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5130 .set = gfx_v7_0_set_priv_reg_fault_state,
5131 .process = gfx_v7_0_priv_reg_irq,
5132};
5133
5134static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5135 .set = gfx_v7_0_set_priv_inst_fault_state,
5136 .process = gfx_v7_0_priv_inst_irq,
5137};
5138
5139static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5140{
5141 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5142 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5143
5144 adev->gfx.priv_reg_irq.num_types = 1;
5145 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5146
5147 adev->gfx.priv_inst_irq.num_types = 1;
5148 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5149}
5150
5151static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5152{
5153
5154 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
5155 adev->gds.gws_size = 64;
5156 adev->gds.oa_size = 16;
5157 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5158}
5159
5160
5161static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5162{
5163 int i, j, k, counter, active_cu_number = 0;
5164 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5165 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5166 unsigned disable_masks[4 * 2];
5167 u32 ao_cu_num;
5168
5169 if (adev->flags & AMD_IS_APU)
5170 ao_cu_num = 2;
5171 else
5172 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5173
5174 memset(cu_info, 0, sizeof(*cu_info));
5175
5176 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5177
5178 mutex_lock(&adev->grbm_idx_mutex);
5179 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5180 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5181 mask = 1;
5182 ao_bitmap = 0;
5183 counter = 0;
5184 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5185 if (i < 4 && j < 2)
5186 gfx_v7_0_set_user_cu_inactive_bitmap(
5187 adev, disable_masks[i * 2 + j]);
5188 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5189 cu_info->bitmap[i][j] = bitmap;
5190
5191 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5192 if (bitmap & mask) {
5193 if (counter < ao_cu_num)
5194 ao_bitmap |= mask;
5195 counter ++;
5196 }
5197 mask <<= 1;
5198 }
5199 active_cu_number += counter;
5200 if (i < 2 && j < 2)
5201 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5202 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5203 }
5204 }
5205 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5206 mutex_unlock(&adev->grbm_idx_mutex);
5207
5208 cu_info->number = active_cu_number;
5209 cu_info->ao_cu_mask = ao_cu_mask;
5210 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5211 cu_info->max_waves_per_simd = 10;
5212 cu_info->max_scratch_slots_per_cu = 32;
5213 cu_info->wave_front_size = 64;
5214 cu_info->lds_size = 64;
5215}
5216
5217const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5218{
5219 .type = AMD_IP_BLOCK_TYPE_GFX,
5220 .major = 7,
5221 .minor = 1,
5222 .rev = 0,
5223 .funcs = &gfx_v7_0_ip_funcs,
5224};
5225
5226const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5227{
5228 .type = AMD_IP_BLOCK_TYPE_GFX,
5229 .major = 7,
5230 .minor = 2,
5231 .rev = 0,
5232 .funcs = &gfx_v7_0_ip_funcs,
5233};
5234
5235const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5236{
5237 .type = AMD_IP_BLOCK_TYPE_GFX,
5238 .major = 7,
5239 .minor = 3,
5240 .rev = 0,
5241 .funcs = &gfx_v7_0_ip_funcs,
5242};
5243