1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26
27#include "amdgpu.h"
28#include "amdgpu_ih.h"
29#include "amdgpu_gfx.h"
30#include "cikd.h"
31#include "cik.h"
32#include "cik_structs.h"
33#include "atom.h"
34#include "amdgpu_ucode.h"
35#include "clearstate_ci.h"
36
37#include "dce/dce_8_0_d.h"
38#include "dce/dce_8_0_sh_mask.h"
39
40#include "bif/bif_4_1_d.h"
41#include "bif/bif_4_1_sh_mask.h"
42
43#include "gca/gfx_7_0_d.h"
44#include "gca/gfx_7_2_enum.h"
45#include "gca/gfx_7_2_sh_mask.h"
46
47#include "gmc/gmc_7_0_d.h"
48#include "gmc/gmc_7_0_sh_mask.h"
49
50#include "oss/oss_2_0_d.h"
51#include "oss/oss_2_0_sh_mask.h"
52
53#define NUM_SIMD_PER_CU 0x4
54
55#define GFX7_NUM_GFX_RINGS 1
56#define GFX7_MEC_HPD_SIZE 2048
57
58static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
59static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
60static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
61
62MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
63MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
64MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
65MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
66MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
67
68MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
69MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
70MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
71MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
72MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
73
74MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
75MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
76MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
77MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
78MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
79MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
80
81MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
82MODULE_FIRMWARE("amdgpu/kabini_me.bin");
83MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
84MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
85MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
86
87MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
88MODULE_FIRMWARE("amdgpu/mullins_me.bin");
89MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
90MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
91MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
92
93static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
94{
95 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
96 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
97 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
98 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
99 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
100 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
101 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
102 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
103 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
104 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
105 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
106 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
107 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
108 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
109 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
110 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
111};
112
113static const u32 spectre_rlc_save_restore_register_list[] =
114{
115 (0x0e00 << 16) | (0xc12c >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc140 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc150 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc15c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc168 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc170 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc178 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc204 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b4 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2b8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2bc >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc2c0 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8228 >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x829c >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0x869c >> 2),
144 0x00000000,
145 (0x0600 << 16) | (0x98f4 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x98f8 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0x9900 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc260 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x90e8 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c000 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x3c00c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x8c1c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x9700 >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x4e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x5e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x6e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x7e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x8e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0x9e00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xae00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0xbe00 << 16) | (0xcd20 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x89bc >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0x8900 >> 2),
184 0x00000000,
185 0x3,
186 (0x0e00 << 16) | (0xc130 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc134 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc1fc >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc208 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc264 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc268 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc26c >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc270 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc274 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc278 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc27c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc280 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc284 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc288 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc28c >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc290 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc294 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc298 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc29c >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a0 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a4 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2a8 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2ac >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc2b0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x301d0 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30238 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30250 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30254 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x30258 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0x3025c >> 2),
245 0x00000000,
246 (0x4e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x5e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x6e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x7e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x8e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0x9e00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xae00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0xbe00 << 16) | (0xc900 >> 2),
261 0x00000000,
262 (0x4e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x5e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x6e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x7e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x8e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0x9e00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xae00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0xbe00 << 16) | (0xc904 >> 2),
277 0x00000000,
278 (0x4e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x5e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x6e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x7e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x8e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0x9e00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xae00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0xbe00 << 16) | (0xc908 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xc90c >> 2),
309 0x00000000,
310 (0x4e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x5e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x6e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x7e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x8e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0x9e00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xae00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0xbe00 << 16) | (0xc910 >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0xc99c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x9834 >> 2),
329 0x00000000,
330 (0x0000 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0001 << 16) | (0x30f00 >> 2),
333 0x00000000,
334 (0x0000 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0001 << 16) | (0x30f04 >> 2),
337 0x00000000,
338 (0x0000 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0001 << 16) | (0x30f08 >> 2),
341 0x00000000,
342 (0x0000 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0001 << 16) | (0x30f0c >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x9b7c >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a14 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0x8a18 >> 2),
351 0x00000000,
352 (0x0600 << 16) | (0x30a00 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bf0 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8bcc >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x8b24 >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0x30a04 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a10 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a14 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a18 >> 2),
367 0x00000000,
368 (0x0600 << 16) | (0x30a2c >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc700 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc704 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc708 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0xc768 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc770 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc774 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc778 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc77c >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc780 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc784 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc788 >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc78c >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc798 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc79c >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a0 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a4 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7a8 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7ac >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b0 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc7b4 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x9100 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x3c010 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92a8 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92ac >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b4 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92b8 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92bc >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c0 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92c8 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92cc >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92d0 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c00 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c04 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c20 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c38 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x8c3c >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xae00 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x9604 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac08 >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac0c >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac10 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac14 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac58 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac68 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac6c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac70 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac74 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac78 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac7c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac80 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac84 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac88 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac8c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x970c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9714 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x9718 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x971c >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x4e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x5e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x6e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x7e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x8e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0x9e00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xae00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0xbe00 << 16) | (0x31068 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd10 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0xcd14 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b0 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b4 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88b8 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x88bc >> 2),
515 0x00000000,
516 (0x0400 << 16) | (0x89c0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c4 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88c8 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d0 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d4 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88d8 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x8980 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x30938 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x3093c >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x30940 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x89a0 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30900 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x30904 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x89b4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c210 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c214 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x3c218 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x8904 >> 2),
551 0x00000000,
552 0x5,
553 (0x0e00 << 16) | (0x8c28 >> 2),
554 (0x0e00 << 16) | (0x8c2c >> 2),
555 (0x0e00 << 16) | (0x8c30 >> 2),
556 (0x0e00 << 16) | (0x8c34 >> 2),
557 (0x0e00 << 16) | (0x9600 >> 2),
558};
559
560static const u32 kalindi_rlc_save_restore_register_list[] =
561{
562 (0x0e00 << 16) | (0xc12c >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc140 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc150 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc15c >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc168 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc170 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc204 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b4 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2b8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2bc >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc2c0 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8228 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x829c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x869c >> 2),
589 0x00000000,
590 (0x0600 << 16) | (0x98f4 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x98f8 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x9900 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc260 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x90e8 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c000 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x3c00c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x8c1c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x9700 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x4e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x5e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x6e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x7e00 << 16) | (0xcd20 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x89bc >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x8900 >> 2),
621 0x00000000,
622 0x3,
623 (0x0e00 << 16) | (0xc130 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc134 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc1fc >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc208 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc264 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc268 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc26c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc270 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc274 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc28c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc290 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc294 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc298 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a0 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a4 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2a8 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc2ac >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x301d0 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30238 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30250 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30254 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x30258 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x3025c >> 2),
668 0x00000000,
669 (0x4e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x5e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x6e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x7e00 << 16) | (0xc900 >> 2),
676 0x00000000,
677 (0x4e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x5e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x6e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x7e00 << 16) | (0xc904 >> 2),
684 0x00000000,
685 (0x4e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x5e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x6e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x7e00 << 16) | (0xc908 >> 2),
692 0x00000000,
693 (0x4e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x5e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x6e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x7e00 << 16) | (0xc90c >> 2),
700 0x00000000,
701 (0x4e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x5e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x6e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x7e00 << 16) | (0xc910 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0xc99c >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x9834 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f00 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f04 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f08 >> 2),
718 0x00000000,
719 (0x0000 << 16) | (0x30f0c >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x9b7c >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a14 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0x8a18 >> 2),
726 0x00000000,
727 (0x0600 << 16) | (0x30a00 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bf0 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8bcc >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8b24 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x30a04 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a10 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a14 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a18 >> 2),
742 0x00000000,
743 (0x0600 << 16) | (0x30a2c >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc700 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc704 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc708 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0xc768 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc770 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc774 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc798 >> 2),
758 0x00000000,
759 (0x0400 << 16) | (0xc79c >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x9100 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x3c010 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c00 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c04 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c20 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c38 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0x8c3c >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xae00 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0x9604 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac08 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac0c >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac10 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac14 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac58 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac68 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac6c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac70 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac74 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac78 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac7c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac80 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac84 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac88 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac8c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x970c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9714 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x9718 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x971c >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x4e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x5e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x6e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x7e00 << 16) | (0x31068 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd10 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xcd14 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b0 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b4 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88b8 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x88bc >> 2),
838 0x00000000,
839 (0x0400 << 16) | (0x89c0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c4 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88c8 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d0 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d4 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88d8 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x8980 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x30938 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x3093c >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x30940 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x89a0 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30900 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x30904 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x89b4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3e1fc >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c210 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c214 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3c218 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x8904 >> 2),
876 0x00000000,
877 0x5,
878 (0x0e00 << 16) | (0x8c28 >> 2),
879 (0x0e00 << 16) | (0x8c2c >> 2),
880 (0x0e00 << 16) | (0x8c30 >> 2),
881 (0x0e00 << 16) | (0x8c34 >> 2),
882 (0x0e00 << 16) | (0x9600 >> 2),
883};
884
885static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
886static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
887static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
888static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
889
890
891
892
893
894
895
896
897
898
899
900
901
902static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
903{
904 const char *chip_name;
905 char fw_name[30];
906 int err;
907
908 DRM_DEBUG("\n");
909
910 switch (adev->asic_type) {
911 case CHIP_BONAIRE:
912 chip_name = "bonaire";
913 break;
914 case CHIP_HAWAII:
915 chip_name = "hawaii";
916 break;
917 case CHIP_KAVERI:
918 chip_name = "kaveri";
919 break;
920 case CHIP_KABINI:
921 chip_name = "kabini";
922 break;
923 case CHIP_MULLINS:
924 chip_name = "mullins";
925 break;
926 default: BUG();
927 }
928
929 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
930 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
931 if (err)
932 goto out;
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934 if (err)
935 goto out;
936
937 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
938 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
939 if (err)
940 goto out;
941 err = amdgpu_ucode_validate(adev->gfx.me_fw);
942 if (err)
943 goto out;
944
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
946 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
947 if (err)
948 goto out;
949 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
950 if (err)
951 goto out;
952
953 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
954 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
955 if (err)
956 goto out;
957 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
958 if (err)
959 goto out;
960
961 if (adev->asic_type == CHIP_KAVERI) {
962 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
963 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 if (err)
965 goto out;
966 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 if (err)
968 goto out;
969 }
970
971 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
972 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
973 if (err)
974 goto out;
975 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
976
977out:
978 if (err) {
979 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
980 release_firmware(adev->gfx.pfp_fw);
981 adev->gfx.pfp_fw = NULL;
982 release_firmware(adev->gfx.me_fw);
983 adev->gfx.me_fw = NULL;
984 release_firmware(adev->gfx.ce_fw);
985 adev->gfx.ce_fw = NULL;
986 release_firmware(adev->gfx.mec_fw);
987 adev->gfx.mec_fw = NULL;
988 release_firmware(adev->gfx.mec2_fw);
989 adev->gfx.mec2_fw = NULL;
990 release_firmware(adev->gfx.rlc_fw);
991 adev->gfx.rlc_fw = NULL;
992 }
993 return err;
994}
995
996static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
997{
998 release_firmware(adev->gfx.pfp_fw);
999 adev->gfx.pfp_fw = NULL;
1000 release_firmware(adev->gfx.me_fw);
1001 adev->gfx.me_fw = NULL;
1002 release_firmware(adev->gfx.ce_fw);
1003 adev->gfx.ce_fw = NULL;
1004 release_firmware(adev->gfx.mec_fw);
1005 adev->gfx.mec_fw = NULL;
1006 release_firmware(adev->gfx.mec2_fw);
1007 adev->gfx.mec2_fw = NULL;
1008 release_firmware(adev->gfx.rlc_fw);
1009 adev->gfx.rlc_fw = NULL;
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1024{
1025 const u32 num_tile_mode_states =
1026 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1027 const u32 num_secondary_tile_mode_states =
1028 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1029 u32 reg_offset, split_equal_to_row_size;
1030 uint32_t *tile, *macrotile;
1031
1032 tile = adev->gfx.config.tile_mode_array;
1033 macrotile = adev->gfx.config.macrotile_mode_array;
1034
1035 switch (adev->gfx.config.mem_row_size_in_kb) {
1036 case 1:
1037 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1038 break;
1039 case 2:
1040 default:
1041 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1042 break;
1043 case 4:
1044 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1045 break;
1046 }
1047
1048 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1049 tile[reg_offset] = 0;
1050 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1051 macrotile[reg_offset] = 0;
1052
1053 switch (adev->asic_type) {
1054 case CHIP_BONAIRE:
1055 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1059 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1060 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1063 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1067 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1071 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1073 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1074 TILE_SPLIT(split_equal_to_row_size));
1075 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1076 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1078 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1081 TILE_SPLIT(split_equal_to_row_size));
1082 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1083 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1084 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1085 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1088 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1092 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1096 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1097 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1098 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1100 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1104 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1105 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1108 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1110 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1112 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1113 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1117 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1120 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1121 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1124 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1125 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1128 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1129 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1132 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1133 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1134 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1137 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1142 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1145 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1148 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1150 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1152 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1156 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1157
1158 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1161 NUM_BANKS(ADDR_SURF_16_BANK));
1162 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1169 NUM_BANKS(ADDR_SURF_16_BANK));
1170 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1173 NUM_BANKS(ADDR_SURF_16_BANK));
1174 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1177 NUM_BANKS(ADDR_SURF_16_BANK));
1178 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1181 NUM_BANKS(ADDR_SURF_8_BANK));
1182 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1185 NUM_BANKS(ADDR_SURF_4_BANK));
1186 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1189 NUM_BANKS(ADDR_SURF_16_BANK));
1190 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1193 NUM_BANKS(ADDR_SURF_16_BANK));
1194 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1201 NUM_BANKS(ADDR_SURF_16_BANK));
1202 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1205 NUM_BANKS(ADDR_SURF_16_BANK));
1206 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1209 NUM_BANKS(ADDR_SURF_8_BANK));
1210 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1213 NUM_BANKS(ADDR_SURF_4_BANK));
1214
1215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1216 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1217 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1218 if (reg_offset != 7)
1219 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1220 break;
1221 case CHIP_HAWAII:
1222 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1226 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1227 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1230 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1231 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1234 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1238 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1241 TILE_SPLIT(split_equal_to_row_size));
1242 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1245 TILE_SPLIT(split_equal_to_row_size));
1246 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1249 TILE_SPLIT(split_equal_to_row_size));
1250 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1253 TILE_SPLIT(split_equal_to_row_size));
1254 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1256 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1259 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1260 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1263 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1264 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1267 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1268 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1271 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1272 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1274 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1278 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1282 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1286 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1287 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1290 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1294 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1297 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1301 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1302 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1305 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1309 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1310 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1313 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1317 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1321 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1325 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1328 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1336 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1340
1341 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1344 NUM_BANKS(ADDR_SURF_16_BANK));
1345 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1348 NUM_BANKS(ADDR_SURF_16_BANK));
1349 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1352 NUM_BANKS(ADDR_SURF_16_BANK));
1353 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1356 NUM_BANKS(ADDR_SURF_16_BANK));
1357 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1360 NUM_BANKS(ADDR_SURF_8_BANK));
1361 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1364 NUM_BANKS(ADDR_SURF_4_BANK));
1365 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1368 NUM_BANKS(ADDR_SURF_4_BANK));
1369 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1372 NUM_BANKS(ADDR_SURF_16_BANK));
1373 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1376 NUM_BANKS(ADDR_SURF_16_BANK));
1377 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1380 NUM_BANKS(ADDR_SURF_16_BANK));
1381 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1384 NUM_BANKS(ADDR_SURF_8_BANK));
1385 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1388 NUM_BANKS(ADDR_SURF_16_BANK));
1389 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1392 NUM_BANKS(ADDR_SURF_8_BANK));
1393 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1396 NUM_BANKS(ADDR_SURF_4_BANK));
1397
1398 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1399 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1401 if (reg_offset != 7)
1402 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1403 break;
1404 case CHIP_KABINI:
1405 case CHIP_KAVERI:
1406 case CHIP_MULLINS:
1407 default:
1408 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1409 PIPE_CONFIG(ADDR_SURF_P2) |
1410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1412 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1413 PIPE_CONFIG(ADDR_SURF_P2) |
1414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1416 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1417 PIPE_CONFIG(ADDR_SURF_P2) |
1418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1420 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1421 PIPE_CONFIG(ADDR_SURF_P2) |
1422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1424 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1425 PIPE_CONFIG(ADDR_SURF_P2) |
1426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1427 TILE_SPLIT(split_equal_to_row_size));
1428 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1429 PIPE_CONFIG(ADDR_SURF_P2) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1431 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1432 PIPE_CONFIG(ADDR_SURF_P2) |
1433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1434 TILE_SPLIT(split_equal_to_row_size));
1435 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1436 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1437 PIPE_CONFIG(ADDR_SURF_P2));
1438 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1439 PIPE_CONFIG(ADDR_SURF_P2) |
1440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1441 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1442 PIPE_CONFIG(ADDR_SURF_P2) |
1443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1445 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1446 PIPE_CONFIG(ADDR_SURF_P2) |
1447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1449 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1450 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1451 PIPE_CONFIG(ADDR_SURF_P2) |
1452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1453 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1454 PIPE_CONFIG(ADDR_SURF_P2) |
1455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1457 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1458 PIPE_CONFIG(ADDR_SURF_P2) |
1459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1461 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1462 PIPE_CONFIG(ADDR_SURF_P2) |
1463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1465 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1466 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1467 PIPE_CONFIG(ADDR_SURF_P2) |
1468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1470 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1471 PIPE_CONFIG(ADDR_SURF_P2) |
1472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1473 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1474 PIPE_CONFIG(ADDR_SURF_P2) |
1475 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1477 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1478 PIPE_CONFIG(ADDR_SURF_P2) |
1479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1481 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1482 PIPE_CONFIG(ADDR_SURF_P2) |
1483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1485 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1486 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1487 PIPE_CONFIG(ADDR_SURF_P2) |
1488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1490 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1491 PIPE_CONFIG(ADDR_SURF_P2) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1494 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1495 PIPE_CONFIG(ADDR_SURF_P2) |
1496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1498 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1501 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1502 PIPE_CONFIG(ADDR_SURF_P2) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1505 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1506 PIPE_CONFIG(ADDR_SURF_P2) |
1507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1509 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1510
1511 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1514 NUM_BANKS(ADDR_SURF_8_BANK));
1515 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1518 NUM_BANKS(ADDR_SURF_8_BANK));
1519 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1522 NUM_BANKS(ADDR_SURF_8_BANK));
1523 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1526 NUM_BANKS(ADDR_SURF_8_BANK));
1527 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1530 NUM_BANKS(ADDR_SURF_8_BANK));
1531 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1534 NUM_BANKS(ADDR_SURF_8_BANK));
1535 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1538 NUM_BANKS(ADDR_SURF_8_BANK));
1539 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1542 NUM_BANKS(ADDR_SURF_16_BANK));
1543 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1546 NUM_BANKS(ADDR_SURF_16_BANK));
1547 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1550 NUM_BANKS(ADDR_SURF_16_BANK));
1551 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1554 NUM_BANKS(ADDR_SURF_16_BANK));
1555 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1558 NUM_BANKS(ADDR_SURF_16_BANK));
1559 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1562 NUM_BANKS(ADDR_SURF_16_BANK));
1563 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1566 NUM_BANKS(ADDR_SURF_8_BANK));
1567
1568 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1569 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1571 if (reg_offset != 7)
1572 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1573 break;
1574 }
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1589 u32 se_num, u32 sh_num, u32 instance)
1590{
1591 u32 data;
1592
1593 if (instance == 0xffffffff)
1594 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1595 else
1596 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1597
1598 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1599 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1600 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1601 else if (se_num == 0xffffffff)
1602 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1603 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1604 else if (sh_num == 0xffffffff)
1605 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1606 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1607 else
1608 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1609 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1610 WREG32(mmGRBM_GFX_INDEX, data);
1611}
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1622{
1623 u32 data, mask;
1624
1625 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1626 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1627
1628 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1629 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1630
1631 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1632 adev->gfx.config.max_sh_per_se);
1633
1634 return (~data) & mask;
1635}
1636
1637static void
1638gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1639{
1640 switch (adev->asic_type) {
1641 case CHIP_BONAIRE:
1642 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1643 SE_XSEL(1) | SE_YSEL(1);
1644 *rconf1 |= 0x0;
1645 break;
1646 case CHIP_HAWAII:
1647 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1648 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1649 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1650 SE_YSEL(3);
1651 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1652 SE_PAIR_YSEL(2);
1653 break;
1654 case CHIP_KAVERI:
1655 *rconf |= RB_MAP_PKR0(2);
1656 *rconf1 |= 0x0;
1657 break;
1658 case CHIP_KABINI:
1659 case CHIP_MULLINS:
1660 *rconf |= 0x0;
1661 *rconf1 |= 0x0;
1662 break;
1663 default:
1664 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1665 break;
1666 }
1667}
1668
1669static void
1670gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1671 u32 raster_config, u32 raster_config_1,
1672 unsigned rb_mask, unsigned num_rb)
1673{
1674 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1675 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1676 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1677 unsigned rb_per_se = num_rb / num_se;
1678 unsigned se_mask[4];
1679 unsigned se;
1680
1681 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1682 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1683 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1684 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1685
1686 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1687 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1688 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1689
1690 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1691 (!se_mask[2] && !se_mask[3]))) {
1692 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1693
1694 if (!se_mask[0] && !se_mask[1]) {
1695 raster_config_1 |=
1696 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1697 } else {
1698 raster_config_1 |=
1699 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1700 }
1701 }
1702
1703 for (se = 0; se < num_se; se++) {
1704 unsigned raster_config_se = raster_config;
1705 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1706 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1707 int idx = (se / 2) * 2;
1708
1709 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1710 raster_config_se &= ~SE_MAP_MASK;
1711
1712 if (!se_mask[idx]) {
1713 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1714 } else {
1715 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1716 }
1717 }
1718
1719 pkr0_mask &= rb_mask;
1720 pkr1_mask &= rb_mask;
1721 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1722 raster_config_se &= ~PKR_MAP_MASK;
1723
1724 if (!pkr0_mask) {
1725 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1726 } else {
1727 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1728 }
1729 }
1730
1731 if (rb_per_se >= 2) {
1732 unsigned rb0_mask = 1 << (se * rb_per_se);
1733 unsigned rb1_mask = rb0_mask << 1;
1734
1735 rb0_mask &= rb_mask;
1736 rb1_mask &= rb_mask;
1737 if (!rb0_mask || !rb1_mask) {
1738 raster_config_se &= ~RB_MAP_PKR0_MASK;
1739
1740 if (!rb0_mask) {
1741 raster_config_se |=
1742 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1743 } else {
1744 raster_config_se |=
1745 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1746 }
1747 }
1748
1749 if (rb_per_se > 2) {
1750 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1751 rb1_mask = rb0_mask << 1;
1752 rb0_mask &= rb_mask;
1753 rb1_mask &= rb_mask;
1754 if (!rb0_mask || !rb1_mask) {
1755 raster_config_se &= ~RB_MAP_PKR1_MASK;
1756
1757 if (!rb0_mask) {
1758 raster_config_se |=
1759 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1760 } else {
1761 raster_config_se |=
1762 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1763 }
1764 }
1765 }
1766 }
1767
1768
1769 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1770 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1771 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1772 }
1773
1774
1775 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1776}
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1788{
1789 int i, j;
1790 u32 data;
1791 u32 raster_config = 0, raster_config_1 = 0;
1792 u32 active_rbs = 0;
1793 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1794 adev->gfx.config.max_sh_per_se;
1795 unsigned num_rb_pipes;
1796
1797 mutex_lock(&adev->grbm_idx_mutex);
1798 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1799 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1800 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1801 data = gfx_v7_0_get_rb_active_bitmap(adev);
1802 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1803 rb_bitmap_width_per_sh);
1804 }
1805 }
1806 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1807
1808 adev->gfx.config.backend_enable_mask = active_rbs;
1809 adev->gfx.config.num_rbs = hweight32(active_rbs);
1810
1811 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1812 adev->gfx.config.max_shader_engines, 16);
1813
1814 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1815
1816 if (!adev->gfx.config.backend_enable_mask ||
1817 adev->gfx.config.num_rbs >= num_rb_pipes) {
1818 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1819 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1820 } else {
1821 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1822 adev->gfx.config.backend_enable_mask,
1823 num_rb_pipes);
1824 }
1825
1826
1827 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1828 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1829 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1830 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1831 RREG32(mmCC_RB_BACKEND_DISABLE);
1832 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1833 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1834 adev->gfx.config.rb_config[i][j].raster_config =
1835 RREG32(mmPA_SC_RASTER_CONFIG);
1836 adev->gfx.config.rb_config[i][j].raster_config_1 =
1837 RREG32(mmPA_SC_RASTER_CONFIG_1);
1838 }
1839 }
1840 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1841 mutex_unlock(&adev->grbm_idx_mutex);
1842}
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852#define DEFAULT_SH_MEM_BASES (0x6000)
1853#define FIRST_COMPUTE_VMID (8)
1854#define LAST_COMPUTE_VMID (16)
1855static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1856{
1857 int i;
1858 uint32_t sh_mem_config;
1859 uint32_t sh_mem_bases;
1860
1861
1862
1863
1864
1865
1866
1867 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1868 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1869 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1870 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1871 mutex_lock(&adev->srbm_mutex);
1872 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1873 cik_srbm_select(adev, 0, 0, 0, i);
1874
1875 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1876 WREG32(mmSH_MEM_APE1_BASE, 1);
1877 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1878 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1879 }
1880 cik_srbm_select(adev, 0, 0, 0, 0);
1881 mutex_unlock(&adev->srbm_mutex);
1882
1883
1884
1885 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1886 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
1887 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
1888 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
1889 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
1890 }
1891}
1892
1893static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
1894{
1895 int vmid;
1896
1897
1898
1899
1900
1901
1902
1903 for (vmid = 1; vmid < 16; vmid++) {
1904 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
1905 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
1906 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
1907 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
1908 }
1909}
1910
1911static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1912{
1913 adev->gfx.config.double_offchip_lds_buf = 1;
1914}
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1925{
1926 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1927 u32 tmp;
1928 int i;
1929
1930 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1931
1932 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1933 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1934 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1935
1936 gfx_v7_0_tiling_mode_table_init(adev);
1937
1938 gfx_v7_0_setup_rb(adev);
1939 gfx_v7_0_get_cu_info(adev);
1940 gfx_v7_0_config_init(adev);
1941
1942
1943 WREG32(mmCP_MEQ_THRESHOLDS,
1944 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1945 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1946
1947 mutex_lock(&adev->grbm_idx_mutex);
1948
1949
1950
1951
1952 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1953
1954
1955
1956 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1958 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1959 MTYPE_NC);
1960 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1961 MTYPE_UC);
1962 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1963
1964 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1965 SWIZZLE_ENABLE, 1);
1966 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1967 ELEMENT_SIZE, 1);
1968 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1969 INDEX_STRIDE, 3);
1970 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1971
1972 mutex_lock(&adev->srbm_mutex);
1973 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1974 if (i == 0)
1975 sh_mem_base = 0;
1976 else
1977 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1978 cik_srbm_select(adev, 0, 0, 0, i);
1979
1980 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1981 WREG32(mmSH_MEM_APE1_BASE, 1);
1982 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1983 WREG32(mmSH_MEM_BASES, sh_mem_base);
1984 }
1985 cik_srbm_select(adev, 0, 0, 0, 0);
1986 mutex_unlock(&adev->srbm_mutex);
1987
1988 gfx_v7_0_init_compute_vmid(adev);
1989 gfx_v7_0_init_gds_vmid(adev);
1990
1991 WREG32(mmSX_DEBUG_1, 0x20);
1992
1993 WREG32(mmTA_CNTL_AUX, 0x00010000);
1994
1995 tmp = RREG32(mmSPI_CONFIG_CNTL);
1996 tmp |= 0x03000000;
1997 WREG32(mmSPI_CONFIG_CNTL, tmp);
1998
1999 WREG32(mmSQ_CONFIG, 1);
2000
2001 WREG32(mmDB_DEBUG, 0);
2002
2003 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2004 tmp |= 0x00000400;
2005 WREG32(mmDB_DEBUG2, tmp);
2006
2007 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2008 tmp |= 0x00020200;
2009 WREG32(mmDB_DEBUG3, tmp);
2010
2011 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2012 tmp |= 0x00018208;
2013 WREG32(mmCB_HW_CONTROL, tmp);
2014
2015 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2016
2017 WREG32(mmPA_SC_FIFO_SIZE,
2018 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2019 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2020 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2021 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2022
2023 WREG32(mmVGT_NUM_INSTANCES, 1);
2024
2025 WREG32(mmCP_PERFMON_CNTL, 0);
2026
2027 WREG32(mmSQ_CONFIG, 0);
2028
2029 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2030 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2031 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2032
2033 WREG32(mmVGT_CACHE_INVALIDATION,
2034 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2035 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2036
2037 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2038 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2039
2040 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2041 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2042 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2043
2044 tmp = RREG32(mmSPI_ARB_PRIORITY);
2045 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2046 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2047 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2048 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2049 WREG32(mmSPI_ARB_PRIORITY, tmp);
2050
2051 mutex_unlock(&adev->grbm_idx_mutex);
2052
2053 udelay(50);
2054}
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2070{
2071 adev->gfx.scratch.num_reg = 8;
2072 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2073 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2074}
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2088{
2089 struct amdgpu_device *adev = ring->adev;
2090 uint32_t scratch;
2091 uint32_t tmp = 0;
2092 unsigned i;
2093 int r;
2094
2095 r = amdgpu_gfx_scratch_get(adev, &scratch);
2096 if (r)
2097 return r;
2098
2099 WREG32(scratch, 0xCAFEDEAD);
2100 r = amdgpu_ring_alloc(ring, 3);
2101 if (r)
2102 goto error_free_scratch;
2103
2104 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2105 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2106 amdgpu_ring_write(ring, 0xDEADBEEF);
2107 amdgpu_ring_commit(ring);
2108
2109 for (i = 0; i < adev->usec_timeout; i++) {
2110 tmp = RREG32(scratch);
2111 if (tmp == 0xDEADBEEF)
2112 break;
2113 udelay(1);
2114 }
2115 if (i >= adev->usec_timeout)
2116 r = -ETIMEDOUT;
2117
2118error_free_scratch:
2119 amdgpu_gfx_scratch_free(adev, scratch);
2120 return r;
2121}
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2132{
2133 u32 ref_and_mask;
2134 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2135
2136 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2137 switch (ring->me) {
2138 case 1:
2139 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2140 break;
2141 case 2:
2142 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2143 break;
2144 default:
2145 return;
2146 }
2147 } else {
2148 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2149 }
2150
2151 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2152 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2153 WAIT_REG_MEM_FUNCTION(3) |
2154 WAIT_REG_MEM_ENGINE(usepfp)));
2155 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2156 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2157 amdgpu_ring_write(ring, ref_and_mask);
2158 amdgpu_ring_write(ring, ref_and_mask);
2159 amdgpu_ring_write(ring, 0x20);
2160}
2161
2162static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2163{
2164 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2165 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2166 EVENT_INDEX(4));
2167
2168 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2169 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2170 EVENT_INDEX(0));
2171}
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2183 u64 seq, unsigned flags)
2184{
2185 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2186 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2187
2188
2189
2190 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2191 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2192 EOP_TC_ACTION_EN |
2193 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2194 EVENT_INDEX(5)));
2195 amdgpu_ring_write(ring, addr & 0xfffffffc);
2196 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2197 DATA_SEL(1) | INT_SEL(0));
2198 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2199 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2200
2201
2202 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2203 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2204 EOP_TC_ACTION_EN |
2205 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2206 EVENT_INDEX(5)));
2207 amdgpu_ring_write(ring, addr & 0xfffffffc);
2208 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2209 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2210 amdgpu_ring_write(ring, lower_32_bits(seq));
2211 amdgpu_ring_write(ring, upper_32_bits(seq));
2212}
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2224 u64 addr, u64 seq,
2225 unsigned flags)
2226{
2227 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2228 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2229
2230
2231 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2232 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2233 EOP_TC_ACTION_EN |
2234 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2235 EVENT_INDEX(5)));
2236 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2237 amdgpu_ring_write(ring, addr & 0xfffffffc);
2238 amdgpu_ring_write(ring, upper_32_bits(addr));
2239 amdgpu_ring_write(ring, lower_32_bits(seq));
2240 amdgpu_ring_write(ring, upper_32_bits(seq));
2241}
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2259 struct amdgpu_job *job,
2260 struct amdgpu_ib *ib,
2261 uint32_t flags)
2262{
2263 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2264 u32 header, control = 0;
2265
2266
2267 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2268 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2269 amdgpu_ring_write(ring, 0);
2270 }
2271
2272 if (ib->flags & AMDGPU_IB_FLAG_CE)
2273 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2274 else
2275 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2276
2277 control |= ib->length_dw | (vmid << 24);
2278
2279 amdgpu_ring_write(ring, header);
2280 amdgpu_ring_write(ring,
2281#ifdef __BIG_ENDIAN
2282 (2 << 0) |
2283#endif
2284 (ib->gpu_addr & 0xFFFFFFFC));
2285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2286 amdgpu_ring_write(ring, control);
2287}
2288
2289static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2290 struct amdgpu_job *job,
2291 struct amdgpu_ib *ib,
2292 uint32_t flags)
2293{
2294 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2295 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
2308 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2309 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
2310 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
2311 }
2312
2313 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2314 amdgpu_ring_write(ring,
2315#ifdef __BIG_ENDIAN
2316 (2 << 0) |
2317#endif
2318 (ib->gpu_addr & 0xFFFFFFFC));
2319 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2320 amdgpu_ring_write(ring, control);
2321}
2322
2323static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2324{
2325 uint32_t dw2 = 0;
2326
2327 dw2 |= 0x80000000;
2328 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2329 gfx_v7_0_ring_emit_vgt_flush(ring);
2330
2331 dw2 |= 0x8001;
2332
2333 dw2 |= 0x01000000;
2334
2335 dw2 |= 0x10002;
2336 }
2337
2338 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2339 amdgpu_ring_write(ring, dw2);
2340 amdgpu_ring_write(ring, 0);
2341}
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2353{
2354 struct amdgpu_device *adev = ring->adev;
2355 struct amdgpu_ib ib;
2356 struct dma_fence *f = NULL;
2357 uint32_t scratch;
2358 uint32_t tmp = 0;
2359 long r;
2360
2361 r = amdgpu_gfx_scratch_get(adev, &scratch);
2362 if (r)
2363 return r;
2364
2365 WREG32(scratch, 0xCAFEDEAD);
2366 memset(&ib, 0, sizeof(ib));
2367 r = amdgpu_ib_get(adev, NULL, 256,
2368 AMDGPU_IB_POOL_DIRECT, &ib);
2369 if (r)
2370 goto err1;
2371
2372 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2373 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2374 ib.ptr[2] = 0xDEADBEEF;
2375 ib.length_dw = 3;
2376
2377 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2378 if (r)
2379 goto err2;
2380
2381 r = dma_fence_wait_timeout(f, false, timeout);
2382 if (r == 0) {
2383 r = -ETIMEDOUT;
2384 goto err2;
2385 } else if (r < 0) {
2386 goto err2;
2387 }
2388 tmp = RREG32(scratch);
2389 if (tmp == 0xDEADBEEF)
2390 r = 0;
2391 else
2392 r = -EINVAL;
2393
2394err2:
2395 amdgpu_ib_free(adev, &ib, NULL);
2396 dma_fence_put(f);
2397err1:
2398 amdgpu_gfx_scratch_free(adev, scratch);
2399 return r;
2400}
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2434{
2435 if (enable)
2436 WREG32(mmCP_ME_CNTL, 0);
2437 else
2438 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
2439 CP_ME_CNTL__PFP_HALT_MASK |
2440 CP_ME_CNTL__CE_HALT_MASK));
2441 udelay(50);
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2453{
2454 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2455 const struct gfx_firmware_header_v1_0 *ce_hdr;
2456 const struct gfx_firmware_header_v1_0 *me_hdr;
2457 const __le32 *fw_data;
2458 unsigned i, fw_size;
2459
2460 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2461 return -EINVAL;
2462
2463 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2464 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2465 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2466
2467 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2468 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2469 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2470 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2471 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2472 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2473 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2474 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2475 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2476
2477 gfx_v7_0_cp_gfx_enable(adev, false);
2478
2479
2480 fw_data = (const __le32 *)
2481 (adev->gfx.pfp_fw->data +
2482 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2483 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2484 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2485 for (i = 0; i < fw_size; i++)
2486 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2487 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2488
2489
2490 fw_data = (const __le32 *)
2491 (adev->gfx.ce_fw->data +
2492 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2493 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2494 WREG32(mmCP_CE_UCODE_ADDR, 0);
2495 for (i = 0; i < fw_size; i++)
2496 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2497 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2498
2499
2500 fw_data = (const __le32 *)
2501 (adev->gfx.me_fw->data +
2502 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2503 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2504 WREG32(mmCP_ME_RAM_WADDR, 0);
2505 for (i = 0; i < fw_size; i++)
2506 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2507 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2508
2509 return 0;
2510}
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2522{
2523 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2524 const struct cs_section_def *sect = NULL;
2525 const struct cs_extent_def *ext = NULL;
2526 int r, i;
2527
2528
2529 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2530 WREG32(mmCP_ENDIAN_SWAP, 0);
2531 WREG32(mmCP_DEVICE_ID, 1);
2532
2533 gfx_v7_0_cp_gfx_enable(adev, true);
2534
2535 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2536 if (r) {
2537 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2538 return r;
2539 }
2540
2541
2542 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2543 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2544 amdgpu_ring_write(ring, 0x8000);
2545 amdgpu_ring_write(ring, 0x8000);
2546
2547
2548 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2549 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2550
2551 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2552 amdgpu_ring_write(ring, 0x80000000);
2553 amdgpu_ring_write(ring, 0x80000000);
2554
2555 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2556 for (ext = sect->section; ext->extent != NULL; ++ext) {
2557 if (sect->id == SECT_CONTEXT) {
2558 amdgpu_ring_write(ring,
2559 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2560 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2561 for (i = 0; i < ext->reg_count; i++)
2562 amdgpu_ring_write(ring, ext->extent[i]);
2563 }
2564 }
2565 }
2566
2567 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2568 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2569 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2570 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2571
2572 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2573 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2574
2575 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2576 amdgpu_ring_write(ring, 0);
2577
2578 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2579 amdgpu_ring_write(ring, 0x00000316);
2580 amdgpu_ring_write(ring, 0x0000000e);
2581 amdgpu_ring_write(ring, 0x00000010);
2582
2583 amdgpu_ring_commit(ring);
2584
2585 return 0;
2586}
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2598{
2599 struct amdgpu_ring *ring;
2600 u32 tmp;
2601 u32 rb_bufsz;
2602 u64 rb_addr, rptr_addr;
2603 int r;
2604
2605 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2606 if (adev->asic_type != CHIP_HAWAII)
2607 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2608
2609
2610 WREG32(mmCP_RB_WPTR_DELAY, 0);
2611
2612
2613 WREG32(mmCP_RB_VMID, 0);
2614
2615 WREG32(mmSCRATCH_ADDR, 0);
2616
2617
2618
2619 ring = &adev->gfx.gfx_ring[0];
2620 rb_bufsz = order_base_2(ring->ring_size / 8);
2621 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2622#ifdef __BIG_ENDIAN
2623 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2624#endif
2625 WREG32(mmCP_RB0_CNTL, tmp);
2626
2627
2628 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2629 ring->wptr = 0;
2630 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2631
2632
2633 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2634 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2635 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2636
2637
2638 WREG32(mmSCRATCH_UMSK, 0);
2639
2640 mdelay(1);
2641 WREG32(mmCP_RB0_CNTL, tmp);
2642
2643 rb_addr = ring->gpu_addr >> 8;
2644 WREG32(mmCP_RB0_BASE, rb_addr);
2645 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2646
2647
2648 gfx_v7_0_cp_gfx_start(adev);
2649 r = amdgpu_ring_test_helper(ring);
2650 if (r)
2651 return r;
2652
2653 return 0;
2654}
2655
2656static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2657{
2658 return ring->adev->wb.wb[ring->rptr_offs];
2659}
2660
2661static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2662{
2663 struct amdgpu_device *adev = ring->adev;
2664
2665 return RREG32(mmCP_RB0_WPTR);
2666}
2667
2668static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2669{
2670 struct amdgpu_device *adev = ring->adev;
2671
2672 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2673 (void)RREG32(mmCP_RB0_WPTR);
2674}
2675
2676static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2677{
2678
2679 return ring->adev->wb.wb[ring->wptr_offs];
2680}
2681
2682static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2683{
2684 struct amdgpu_device *adev = ring->adev;
2685
2686
2687 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2688 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2700{
2701 if (enable)
2702 WREG32(mmCP_MEC_CNTL, 0);
2703 else
2704 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
2705 CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2706 udelay(50);
2707}
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2718{
2719 const struct gfx_firmware_header_v1_0 *mec_hdr;
2720 const __le32 *fw_data;
2721 unsigned i, fw_size;
2722
2723 if (!adev->gfx.mec_fw)
2724 return -EINVAL;
2725
2726 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2727 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2728 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2729 adev->gfx.mec_feature_version = le32_to_cpu(
2730 mec_hdr->ucode_feature_version);
2731
2732 gfx_v7_0_cp_compute_enable(adev, false);
2733
2734
2735 fw_data = (const __le32 *)
2736 (adev->gfx.mec_fw->data +
2737 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2738 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2739 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2740 for (i = 0; i < fw_size; i++)
2741 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2742 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2743
2744 if (adev->asic_type == CHIP_KAVERI) {
2745 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2746
2747 if (!adev->gfx.mec2_fw)
2748 return -EINVAL;
2749
2750 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2751 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2752 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2753 adev->gfx.mec2_feature_version = le32_to_cpu(
2754 mec2_hdr->ucode_feature_version);
2755
2756
2757 fw_data = (const __le32 *)
2758 (adev->gfx.mec2_fw->data +
2759 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2760 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2761 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2762 for (i = 0; i < fw_size; i++)
2763 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2764 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2765 }
2766
2767 return 0;
2768}
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2779{
2780 int i;
2781
2782 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2783 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2784
2785 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2786 }
2787}
2788
2789static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2790{
2791 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2792}
2793
2794static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2795{
2796 int r;
2797 u32 *hpd;
2798 size_t mec_hpd_size;
2799
2800 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2801
2802
2803 amdgpu_gfx_compute_queue_acquire(adev);
2804
2805
2806 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2807 * GFX7_MEC_HPD_SIZE * 2;
2808
2809 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2810 AMDGPU_GEM_DOMAIN_VRAM,
2811 &adev->gfx.mec.hpd_eop_obj,
2812 &adev->gfx.mec.hpd_eop_gpu_addr,
2813 (void **)&hpd);
2814 if (r) {
2815 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2816 gfx_v7_0_mec_fini(adev);
2817 return r;
2818 }
2819
2820
2821 memset(hpd, 0, mec_hpd_size);
2822
2823 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2824 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2825
2826 return 0;
2827}
2828
2829struct hqd_registers
2830{
2831 u32 cp_mqd_base_addr;
2832 u32 cp_mqd_base_addr_hi;
2833 u32 cp_hqd_active;
2834 u32 cp_hqd_vmid;
2835 u32 cp_hqd_persistent_state;
2836 u32 cp_hqd_pipe_priority;
2837 u32 cp_hqd_queue_priority;
2838 u32 cp_hqd_quantum;
2839 u32 cp_hqd_pq_base;
2840 u32 cp_hqd_pq_base_hi;
2841 u32 cp_hqd_pq_rptr;
2842 u32 cp_hqd_pq_rptr_report_addr;
2843 u32 cp_hqd_pq_rptr_report_addr_hi;
2844 u32 cp_hqd_pq_wptr_poll_addr;
2845 u32 cp_hqd_pq_wptr_poll_addr_hi;
2846 u32 cp_hqd_pq_doorbell_control;
2847 u32 cp_hqd_pq_wptr;
2848 u32 cp_hqd_pq_control;
2849 u32 cp_hqd_ib_base_addr;
2850 u32 cp_hqd_ib_base_addr_hi;
2851 u32 cp_hqd_ib_rptr;
2852 u32 cp_hqd_ib_control;
2853 u32 cp_hqd_iq_timer;
2854 u32 cp_hqd_iq_rptr;
2855 u32 cp_hqd_dequeue_request;
2856 u32 cp_hqd_dma_offload;
2857 u32 cp_hqd_sema_cmd;
2858 u32 cp_hqd_msg_type;
2859 u32 cp_hqd_atomic0_preop_lo;
2860 u32 cp_hqd_atomic0_preop_hi;
2861 u32 cp_hqd_atomic1_preop_lo;
2862 u32 cp_hqd_atomic1_preop_hi;
2863 u32 cp_hqd_hq_scheduler0;
2864 u32 cp_hqd_hq_scheduler1;
2865 u32 cp_mqd_control;
2866};
2867
2868static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2869 int mec, int pipe)
2870{
2871 u64 eop_gpu_addr;
2872 u32 tmp;
2873 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2874 * GFX7_MEC_HPD_SIZE * 2;
2875
2876 mutex_lock(&adev->srbm_mutex);
2877 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2878
2879 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2880
2881
2882 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2883 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2884
2885
2886 WREG32(mmCP_HPD_EOP_VMID, 0);
2887
2888
2889 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2890 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2891 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2892 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2893
2894 cik_srbm_select(adev, 0, 0, 0, 0);
2895 mutex_unlock(&adev->srbm_mutex);
2896}
2897
2898static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2899{
2900 int i;
2901
2902
2903 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2904 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2905 for (i = 0; i < adev->usec_timeout; i++) {
2906 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2907 break;
2908 udelay(1);
2909 }
2910
2911 if (i == adev->usec_timeout)
2912 return -ETIMEDOUT;
2913
2914 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2915 WREG32(mmCP_HQD_PQ_RPTR, 0);
2916 WREG32(mmCP_HQD_PQ_WPTR, 0);
2917 }
2918
2919 return 0;
2920}
2921
2922static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2923 struct cik_mqd *mqd,
2924 uint64_t mqd_gpu_addr,
2925 struct amdgpu_ring *ring)
2926{
2927 u64 hqd_gpu_addr;
2928 u64 wb_gpu_addr;
2929
2930
2931 memset(mqd, 0, sizeof(struct cik_mqd));
2932
2933 mqd->header = 0xC0310800;
2934 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2935 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2936 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2937 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2938
2939
2940 mqd->cp_hqd_pq_doorbell_control =
2941 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2942 if (ring->use_doorbell)
2943 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2944 else
2945 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2946
2947
2948 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2949 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2950
2951
2952 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2953 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2954
2955
2956 hqd_gpu_addr = ring->gpu_addr >> 8;
2957 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2958 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2959
2960
2961 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2962 mqd->cp_hqd_pq_control &=
2963 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2964 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2965
2966 mqd->cp_hqd_pq_control |=
2967 order_base_2(ring->ring_size / 8);
2968 mqd->cp_hqd_pq_control |=
2969 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2970#ifdef __BIG_ENDIAN
2971 mqd->cp_hqd_pq_control |=
2972 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2973#endif
2974 mqd->cp_hqd_pq_control &=
2975 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2976 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2977 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2978 mqd->cp_hqd_pq_control |=
2979 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2980 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2981
2982
2983 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2984 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2985 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2986
2987
2988 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2989 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2990 mqd->cp_hqd_pq_rptr_report_addr_hi =
2991 upper_32_bits(wb_gpu_addr) & 0xffff;
2992
2993
2994 if (ring->use_doorbell) {
2995 mqd->cp_hqd_pq_doorbell_control =
2996 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2997 mqd->cp_hqd_pq_doorbell_control &=
2998 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
2999 mqd->cp_hqd_pq_doorbell_control |=
3000 (ring->doorbell_index <<
3001 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3002 mqd->cp_hqd_pq_doorbell_control |=
3003 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3004 mqd->cp_hqd_pq_doorbell_control &=
3005 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3006 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3007
3008 } else {
3009 mqd->cp_hqd_pq_doorbell_control = 0;
3010 }
3011
3012
3013 ring->wptr = 0;
3014 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3015 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3016
3017
3018 mqd->cp_hqd_vmid = 0;
3019
3020
3021 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3022 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3023 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3024 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3025 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3026 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3027 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3028 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3029 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3030 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3031 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3032 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3033 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3034 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3035 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3036 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3037
3038
3039 mqd->cp_hqd_active = 1;
3040}
3041
3042int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3043{
3044 uint32_t tmp;
3045 uint32_t mqd_reg;
3046 uint32_t *mqd_data;
3047
3048
3049 mqd_data = &mqd->cp_mqd_base_addr_lo;
3050
3051
3052 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3053 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3054 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3055
3056
3057 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3058 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3059
3060
3061 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3062 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3063
3064 return 0;
3065}
3066
3067static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3068{
3069 int r;
3070 u64 mqd_gpu_addr;
3071 struct cik_mqd *mqd;
3072 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3073
3074 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3075 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3076 &mqd_gpu_addr, (void **)&mqd);
3077 if (r) {
3078 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3079 return r;
3080 }
3081
3082 mutex_lock(&adev->srbm_mutex);
3083 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3084
3085 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3086 gfx_v7_0_mqd_deactivate(adev);
3087 gfx_v7_0_mqd_commit(adev, mqd);
3088
3089 cik_srbm_select(adev, 0, 0, 0, 0);
3090 mutex_unlock(&adev->srbm_mutex);
3091
3092 amdgpu_bo_kunmap(ring->mqd_obj);
3093 amdgpu_bo_unreserve(ring->mqd_obj);
3094 return 0;
3095}
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3107{
3108 int r, i, j;
3109 u32 tmp;
3110 struct amdgpu_ring *ring;
3111
3112
3113 tmp = RREG32(mmCP_CPF_DEBUG);
3114 tmp |= (1 << 23);
3115 WREG32(mmCP_CPF_DEBUG, tmp);
3116
3117
3118 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3119 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3120 gfx_v7_0_compute_pipe_init(adev, i, j);
3121
3122
3123 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3124 r = gfx_v7_0_compute_queue_init(adev, i);
3125 if (r) {
3126 gfx_v7_0_cp_compute_fini(adev);
3127 return r;
3128 }
3129 }
3130
3131 gfx_v7_0_cp_compute_enable(adev, true);
3132
3133 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3134 ring = &adev->gfx.compute_ring[i];
3135 amdgpu_ring_test_helper(ring);
3136 }
3137
3138 return 0;
3139}
3140
3141static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3142{
3143 gfx_v7_0_cp_gfx_enable(adev, enable);
3144 gfx_v7_0_cp_compute_enable(adev, enable);
3145}
3146
3147static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3148{
3149 int r;
3150
3151 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3152 if (r)
3153 return r;
3154 r = gfx_v7_0_cp_compute_load_microcode(adev);
3155 if (r)
3156 return r;
3157
3158 return 0;
3159}
3160
3161static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3162 bool enable)
3163{
3164 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3165
3166 if (enable)
3167 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3168 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3169 else
3170 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3171 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3172 WREG32(mmCP_INT_CNTL_RING0, tmp);
3173}
3174
3175static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3176{
3177 int r;
3178
3179 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3180
3181 r = gfx_v7_0_cp_load_microcode(adev);
3182 if (r)
3183 return r;
3184
3185 r = gfx_v7_0_cp_gfx_resume(adev);
3186 if (r)
3187 return r;
3188 r = gfx_v7_0_cp_compute_resume(adev);
3189 if (r)
3190 return r;
3191
3192 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3193
3194 return 0;
3195}
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3206{
3207 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3208 uint32_t seq = ring->fence_drv.sync_seq;
3209 uint64_t addr = ring->fence_drv.gpu_addr;
3210
3211 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3212 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3213 WAIT_REG_MEM_FUNCTION(3) |
3214 WAIT_REG_MEM_ENGINE(usepfp)));
3215 amdgpu_ring_write(ring, addr & 0xfffffffc);
3216 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3217 amdgpu_ring_write(ring, seq);
3218 amdgpu_ring_write(ring, 0xffffffff);
3219 amdgpu_ring_write(ring, 4);
3220
3221 if (usepfp) {
3222
3223 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3224 amdgpu_ring_write(ring, 0);
3225 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3226 amdgpu_ring_write(ring, 0);
3227 }
3228}
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3245 unsigned vmid, uint64_t pd_addr)
3246{
3247 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3248
3249 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3250
3251
3252 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3253 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3254 WAIT_REG_MEM_FUNCTION(0) |
3255 WAIT_REG_MEM_ENGINE(0)));
3256 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3257 amdgpu_ring_write(ring, 0);
3258 amdgpu_ring_write(ring, 0);
3259 amdgpu_ring_write(ring, 0);
3260 amdgpu_ring_write(ring, 0x20);
3261
3262
3263 if (usepfp) {
3264
3265 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3266 amdgpu_ring_write(ring, 0x0);
3267
3268
3269 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3270 amdgpu_ring_write(ring, 0);
3271 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3272 amdgpu_ring_write(ring, 0);
3273 }
3274}
3275
3276static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3277 uint32_t reg, uint32_t val)
3278{
3279 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3280
3281 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3282 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3283 WRITE_DATA_DST_SEL(0)));
3284 amdgpu_ring_write(ring, reg);
3285 amdgpu_ring_write(ring, 0);
3286 amdgpu_ring_write(ring, val);
3287}
3288
3289
3290
3291
3292
3293
3294static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3295{
3296 const u32 *src_ptr;
3297 u32 dws;
3298 const struct cs_section_def *cs_data;
3299 int r;
3300
3301
3302 if (adev->flags & AMD_IS_APU) {
3303 if (adev->asic_type == CHIP_KAVERI) {
3304 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3305 adev->gfx.rlc.reg_list_size =
3306 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3307 } else {
3308 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3309 adev->gfx.rlc.reg_list_size =
3310 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3311 }
3312 }
3313 adev->gfx.rlc.cs_data = ci_cs_data;
3314 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3315 adev->gfx.rlc.cp_table_size += 64 * 1024;
3316
3317 src_ptr = adev->gfx.rlc.reg_list;
3318 dws = adev->gfx.rlc.reg_list_size;
3319 dws += (5 * 16) + 48 + 48 + 64;
3320
3321 cs_data = adev->gfx.rlc.cs_data;
3322
3323 if (src_ptr) {
3324
3325 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3326 if (r)
3327 return r;
3328 }
3329
3330 if (cs_data) {
3331
3332 r = amdgpu_gfx_rlc_init_csb(adev);
3333 if (r)
3334 return r;
3335 }
3336
3337 if (adev->gfx.rlc.cp_table_size) {
3338 r = amdgpu_gfx_rlc_init_cpt(adev);
3339 if (r)
3340 return r;
3341 }
3342
3343
3344 if (adev->gfx.rlc.funcs->update_spm_vmid)
3345 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
3346
3347 return 0;
3348}
3349
3350static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3351{
3352 u32 tmp;
3353
3354 tmp = RREG32(mmRLC_LB_CNTL);
3355 if (enable)
3356 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3357 else
3358 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3359 WREG32(mmRLC_LB_CNTL, tmp);
3360}
3361
3362static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3363{
3364 u32 i, j, k;
3365 u32 mask;
3366
3367 mutex_lock(&adev->grbm_idx_mutex);
3368 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3369 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3370 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3371 for (k = 0; k < adev->usec_timeout; k++) {
3372 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3373 break;
3374 udelay(1);
3375 }
3376 }
3377 }
3378 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3379 mutex_unlock(&adev->grbm_idx_mutex);
3380
3381 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3382 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3383 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3384 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3385 for (k = 0; k < adev->usec_timeout; k++) {
3386 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3387 break;
3388 udelay(1);
3389 }
3390}
3391
3392static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3393{
3394 u32 tmp;
3395
3396 tmp = RREG32(mmRLC_CNTL);
3397 if (tmp != rlc)
3398 WREG32(mmRLC_CNTL, rlc);
3399}
3400
3401static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3402{
3403 u32 data, orig;
3404
3405 orig = data = RREG32(mmRLC_CNTL);
3406
3407 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3408 u32 i;
3409
3410 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3411 WREG32(mmRLC_CNTL, data);
3412
3413 for (i = 0; i < adev->usec_timeout; i++) {
3414 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3415 break;
3416 udelay(1);
3417 }
3418
3419 gfx_v7_0_wait_for_rlc_serdes(adev);
3420 }
3421
3422 return orig;
3423}
3424
3425static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3426{
3427 return true;
3428}
3429
3430static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3431{
3432 u32 tmp, i, mask;
3433
3434 tmp = 0x1 | (1 << 1);
3435 WREG32(mmRLC_GPR_REG2, tmp);
3436
3437 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3438 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3439 for (i = 0; i < adev->usec_timeout; i++) {
3440 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3441 break;
3442 udelay(1);
3443 }
3444
3445 for (i = 0; i < adev->usec_timeout; i++) {
3446 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3447 break;
3448 udelay(1);
3449 }
3450}
3451
3452static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3453{
3454 u32 tmp;
3455
3456 tmp = 0x1 | (0 << 1);
3457 WREG32(mmRLC_GPR_REG2, tmp);
3458}
3459
3460
3461
3462
3463
3464
3465
3466
3467static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3468{
3469 WREG32(mmRLC_CNTL, 0);
3470
3471 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3472
3473 gfx_v7_0_wait_for_rlc_serdes(adev);
3474}
3475
3476
3477
3478
3479
3480
3481
3482
3483static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3484{
3485 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3486
3487 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3488
3489 udelay(50);
3490}
3491
3492static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3493{
3494 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3495
3496 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3497 WREG32(mmGRBM_SOFT_RESET, tmp);
3498 udelay(50);
3499 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3500 WREG32(mmGRBM_SOFT_RESET, tmp);
3501 udelay(50);
3502}
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3514{
3515 const struct rlc_firmware_header_v1_0 *hdr;
3516 const __le32 *fw_data;
3517 unsigned i, fw_size;
3518 u32 tmp;
3519
3520 if (!adev->gfx.rlc_fw)
3521 return -EINVAL;
3522
3523 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3524 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3525 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3526 adev->gfx.rlc_feature_version = le32_to_cpu(
3527 hdr->ucode_feature_version);
3528
3529 adev->gfx.rlc.funcs->stop(adev);
3530
3531
3532 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3533 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3534
3535 adev->gfx.rlc.funcs->reset(adev);
3536
3537 gfx_v7_0_init_pg(adev);
3538
3539 WREG32(mmRLC_LB_CNTR_INIT, 0);
3540 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3541
3542 mutex_lock(&adev->grbm_idx_mutex);
3543 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3544 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3545 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3546 WREG32(mmRLC_LB_CNTL, 0x80000004);
3547 mutex_unlock(&adev->grbm_idx_mutex);
3548
3549 WREG32(mmRLC_MC_CNTL, 0);
3550 WREG32(mmRLC_UCODE_CNTL, 0);
3551
3552 fw_data = (const __le32 *)
3553 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3554 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3555 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3556 for (i = 0; i < fw_size; i++)
3557 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3558 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3559
3560
3561 gfx_v7_0_enable_lbpw(adev, false);
3562
3563 if (adev->asic_type == CHIP_BONAIRE)
3564 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3565
3566 adev->gfx.rlc.funcs->start(adev);
3567
3568 return 0;
3569}
3570
3571static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
3572{
3573 u32 data;
3574
3575 data = RREG32(mmRLC_SPM_VMID);
3576
3577 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
3578 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
3579
3580 WREG32(mmRLC_SPM_VMID, data);
3581}
3582
3583static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3584{
3585 u32 data, orig, tmp, tmp2;
3586
3587 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3588
3589 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3590 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3591
3592 tmp = gfx_v7_0_halt_rlc(adev);
3593
3594 mutex_lock(&adev->grbm_idx_mutex);
3595 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3596 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3597 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3598 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3599 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3600 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3601 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3602 mutex_unlock(&adev->grbm_idx_mutex);
3603
3604 gfx_v7_0_update_rlc(adev, tmp);
3605
3606 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3607 if (orig != data)
3608 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3609
3610 } else {
3611 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3612
3613 RREG32(mmCB_CGTT_SCLK_CTRL);
3614 RREG32(mmCB_CGTT_SCLK_CTRL);
3615 RREG32(mmCB_CGTT_SCLK_CTRL);
3616 RREG32(mmCB_CGTT_SCLK_CTRL);
3617
3618 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3619 if (orig != data)
3620 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3621
3622 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3623 }
3624}
3625
3626static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3627{
3628 u32 data, orig, tmp = 0;
3629
3630 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3631 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3632 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3633 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3634 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3635 if (orig != data)
3636 WREG32(mmCP_MEM_SLP_CNTL, data);
3637 }
3638 }
3639
3640 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3641 data |= 0x00000001;
3642 data &= 0xfffffffd;
3643 if (orig != data)
3644 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3645
3646 tmp = gfx_v7_0_halt_rlc(adev);
3647
3648 mutex_lock(&adev->grbm_idx_mutex);
3649 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3650 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3651 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3652 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3653 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3654 WREG32(mmRLC_SERDES_WR_CTRL, data);
3655 mutex_unlock(&adev->grbm_idx_mutex);
3656
3657 gfx_v7_0_update_rlc(adev, tmp);
3658
3659 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3660 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3661 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3662 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3663 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3664 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3665 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3666 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3667 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3668 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3669 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3670 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3671 if (orig != data)
3672 WREG32(mmCGTS_SM_CTRL_REG, data);
3673 }
3674 } else {
3675 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3676 data |= 0x00000003;
3677 if (orig != data)
3678 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3679
3680 data = RREG32(mmRLC_MEM_SLP_CNTL);
3681 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3682 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3683 WREG32(mmRLC_MEM_SLP_CNTL, data);
3684 }
3685
3686 data = RREG32(mmCP_MEM_SLP_CNTL);
3687 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3688 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3689 WREG32(mmCP_MEM_SLP_CNTL, data);
3690 }
3691
3692 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3693 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3694 if (orig != data)
3695 WREG32(mmCGTS_SM_CTRL_REG, data);
3696
3697 tmp = gfx_v7_0_halt_rlc(adev);
3698
3699 mutex_lock(&adev->grbm_idx_mutex);
3700 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3701 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3702 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3703 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3704 WREG32(mmRLC_SERDES_WR_CTRL, data);
3705 mutex_unlock(&adev->grbm_idx_mutex);
3706
3707 gfx_v7_0_update_rlc(adev, tmp);
3708 }
3709}
3710
3711static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3712 bool enable)
3713{
3714 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3715
3716 if (enable) {
3717 gfx_v7_0_enable_mgcg(adev, true);
3718 gfx_v7_0_enable_cgcg(adev, true);
3719 } else {
3720 gfx_v7_0_enable_cgcg(adev, false);
3721 gfx_v7_0_enable_mgcg(adev, false);
3722 }
3723 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3724}
3725
3726static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3727 bool enable)
3728{
3729 u32 data, orig;
3730
3731 orig = data = RREG32(mmRLC_PG_CNTL);
3732 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3733 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3734 else
3735 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3736 if (orig != data)
3737 WREG32(mmRLC_PG_CNTL, data);
3738}
3739
3740static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3741 bool enable)
3742{
3743 u32 data, orig;
3744
3745 orig = data = RREG32(mmRLC_PG_CNTL);
3746 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3747 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3748 else
3749 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3750 if (orig != data)
3751 WREG32(mmRLC_PG_CNTL, data);
3752}
3753
3754static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3755{
3756 u32 data, orig;
3757
3758 orig = data = RREG32(mmRLC_PG_CNTL);
3759 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3760 data &= ~0x8000;
3761 else
3762 data |= 0x8000;
3763 if (orig != data)
3764 WREG32(mmRLC_PG_CNTL, data);
3765}
3766
3767static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3768{
3769 u32 data, orig;
3770
3771 orig = data = RREG32(mmRLC_PG_CNTL);
3772 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3773 data &= ~0x2000;
3774 else
3775 data |= 0x2000;
3776 if (orig != data)
3777 WREG32(mmRLC_PG_CNTL, data);
3778}
3779
3780static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3781{
3782 if (adev->asic_type == CHIP_KAVERI)
3783 return 5;
3784 else
3785 return 4;
3786}
3787
3788static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3789 bool enable)
3790{
3791 u32 data, orig;
3792
3793 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3794 orig = data = RREG32(mmRLC_PG_CNTL);
3795 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3796 if (orig != data)
3797 WREG32(mmRLC_PG_CNTL, data);
3798
3799 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3800 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3801 if (orig != data)
3802 WREG32(mmRLC_AUTO_PG_CTRL, data);
3803 } else {
3804 orig = data = RREG32(mmRLC_PG_CNTL);
3805 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3806 if (orig != data)
3807 WREG32(mmRLC_PG_CNTL, data);
3808
3809 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3810 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3811 if (orig != data)
3812 WREG32(mmRLC_AUTO_PG_CTRL, data);
3813
3814 data = RREG32(mmDB_RENDER_CONTROL);
3815 }
3816}
3817
3818static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3819 u32 bitmap)
3820{
3821 u32 data;
3822
3823 if (!bitmap)
3824 return;
3825
3826 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3827 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3828
3829 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3830}
3831
3832static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3833{
3834 u32 data, mask;
3835
3836 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3837 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3838
3839 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3840 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3841
3842 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3843
3844 return (~data) & mask;
3845}
3846
3847static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3848{
3849 u32 tmp;
3850
3851 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3852
3853 tmp = RREG32(mmRLC_MAX_PG_CU);
3854 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3855 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3856 WREG32(mmRLC_MAX_PG_CU, tmp);
3857}
3858
3859static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3860 bool enable)
3861{
3862 u32 data, orig;
3863
3864 orig = data = RREG32(mmRLC_PG_CNTL);
3865 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3866 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3867 else
3868 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3869 if (orig != data)
3870 WREG32(mmRLC_PG_CNTL, data);
3871}
3872
3873static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3874 bool enable)
3875{
3876 u32 data, orig;
3877
3878 orig = data = RREG32(mmRLC_PG_CNTL);
3879 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3880 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3881 else
3882 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3883 if (orig != data)
3884 WREG32(mmRLC_PG_CNTL, data);
3885}
3886
3887#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3888#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3889
3890static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3891{
3892 u32 data, orig;
3893 u32 i;
3894
3895 if (adev->gfx.rlc.cs_data) {
3896 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3897 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3898 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3899 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3900 } else {
3901 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3902 for (i = 0; i < 3; i++)
3903 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3904 }
3905 if (adev->gfx.rlc.reg_list) {
3906 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3907 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3908 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3909 }
3910
3911 orig = data = RREG32(mmRLC_PG_CNTL);
3912 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3913 if (orig != data)
3914 WREG32(mmRLC_PG_CNTL, data);
3915
3916 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3917 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3918
3919 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3920 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3921 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3922 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3923
3924 data = 0x10101010;
3925 WREG32(mmRLC_PG_DELAY, data);
3926
3927 data = RREG32(mmRLC_PG_DELAY_2);
3928 data &= ~0xff;
3929 data |= 0x3;
3930 WREG32(mmRLC_PG_DELAY_2, data);
3931
3932 data = RREG32(mmRLC_AUTO_PG_CTRL);
3933 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3934 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3935 WREG32(mmRLC_AUTO_PG_CTRL, data);
3936
3937}
3938
3939static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
3940{
3941 gfx_v7_0_enable_gfx_cgpg(adev, enable);
3942 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
3943 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
3944}
3945
3946static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
3947{
3948 u32 count = 0;
3949 const struct cs_section_def *sect = NULL;
3950 const struct cs_extent_def *ext = NULL;
3951
3952 if (adev->gfx.rlc.cs_data == NULL)
3953 return 0;
3954
3955
3956 count += 2;
3957
3958 count += 3;
3959
3960 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3961 for (ext = sect->section; ext->extent != NULL; ++ext) {
3962 if (sect->id == SECT_CONTEXT)
3963 count += 2 + ext->reg_count;
3964 else
3965 return 0;
3966 }
3967 }
3968
3969 count += 4;
3970
3971 count += 2;
3972
3973 count += 2;
3974
3975 return count;
3976}
3977
3978static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
3979 volatile u32 *buffer)
3980{
3981 u32 count = 0, i;
3982 const struct cs_section_def *sect = NULL;
3983 const struct cs_extent_def *ext = NULL;
3984
3985 if (adev->gfx.rlc.cs_data == NULL)
3986 return;
3987 if (buffer == NULL)
3988 return;
3989
3990 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3991 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3992
3993 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3994 buffer[count++] = cpu_to_le32(0x80000000);
3995 buffer[count++] = cpu_to_le32(0x80000000);
3996
3997 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3998 for (ext = sect->section; ext->extent != NULL; ++ext) {
3999 if (sect->id == SECT_CONTEXT) {
4000 buffer[count++] =
4001 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4002 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4003 for (i = 0; i < ext->reg_count; i++)
4004 buffer[count++] = cpu_to_le32(ext->extent[i]);
4005 } else {
4006 return;
4007 }
4008 }
4009 }
4010
4011 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4012 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4013 switch (adev->asic_type) {
4014 case CHIP_BONAIRE:
4015 buffer[count++] = cpu_to_le32(0x16000012);
4016 buffer[count++] = cpu_to_le32(0x00000000);
4017 break;
4018 case CHIP_KAVERI:
4019 buffer[count++] = cpu_to_le32(0x00000000);
4020 buffer[count++] = cpu_to_le32(0x00000000);
4021 break;
4022 case CHIP_KABINI:
4023 case CHIP_MULLINS:
4024 buffer[count++] = cpu_to_le32(0x00000000);
4025 buffer[count++] = cpu_to_le32(0x00000000);
4026 break;
4027 case CHIP_HAWAII:
4028 buffer[count++] = cpu_to_le32(0x3a00161a);
4029 buffer[count++] = cpu_to_le32(0x0000002e);
4030 break;
4031 default:
4032 buffer[count++] = cpu_to_le32(0x00000000);
4033 buffer[count++] = cpu_to_le32(0x00000000);
4034 break;
4035 }
4036
4037 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4038 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4039
4040 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4041 buffer[count++] = cpu_to_le32(0);
4042}
4043
4044static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4045{
4046 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4047 AMD_PG_SUPPORT_GFX_SMG |
4048 AMD_PG_SUPPORT_GFX_DMG |
4049 AMD_PG_SUPPORT_CP |
4050 AMD_PG_SUPPORT_GDS |
4051 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4052 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4053 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4054 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4055 gfx_v7_0_init_gfx_cgpg(adev);
4056 gfx_v7_0_enable_cp_pg(adev, true);
4057 gfx_v7_0_enable_gds_pg(adev, true);
4058 }
4059 gfx_v7_0_init_ao_cu_mask(adev);
4060 gfx_v7_0_update_gfx_pg(adev, true);
4061 }
4062}
4063
4064static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4065{
4066 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4067 AMD_PG_SUPPORT_GFX_SMG |
4068 AMD_PG_SUPPORT_GFX_DMG |
4069 AMD_PG_SUPPORT_CP |
4070 AMD_PG_SUPPORT_GDS |
4071 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4072 gfx_v7_0_update_gfx_pg(adev, false);
4073 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4074 gfx_v7_0_enable_cp_pg(adev, false);
4075 gfx_v7_0_enable_gds_pg(adev, false);
4076 }
4077 }
4078}
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4089{
4090 uint64_t clock;
4091
4092 mutex_lock(&adev->gfx.gpu_clock_mutex);
4093 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4094 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4095 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4096 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4097 return clock;
4098}
4099
4100static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4101 uint32_t vmid,
4102 uint32_t gds_base, uint32_t gds_size,
4103 uint32_t gws_base, uint32_t gws_size,
4104 uint32_t oa_base, uint32_t oa_size)
4105{
4106
4107 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4108 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4109 WRITE_DATA_DST_SEL(0)));
4110 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4111 amdgpu_ring_write(ring, 0);
4112 amdgpu_ring_write(ring, gds_base);
4113
4114
4115 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4116 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4117 WRITE_DATA_DST_SEL(0)));
4118 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4119 amdgpu_ring_write(ring, 0);
4120 amdgpu_ring_write(ring, gds_size);
4121
4122
4123 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4124 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4125 WRITE_DATA_DST_SEL(0)));
4126 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4127 amdgpu_ring_write(ring, 0);
4128 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4129
4130
4131 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4132 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4133 WRITE_DATA_DST_SEL(0)));
4134 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4135 amdgpu_ring_write(ring, 0);
4136 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4137}
4138
4139static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4140{
4141 struct amdgpu_device *adev = ring->adev;
4142 uint32_t value = 0;
4143
4144 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4145 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4146 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4147 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4148 WREG32(mmSQ_CMD, value);
4149}
4150
4151static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4152{
4153 WREG32(mmSQ_IND_INDEX,
4154 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4155 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4156 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4157 (SQ_IND_INDEX__FORCE_READ_MASK));
4158 return RREG32(mmSQ_IND_DATA);
4159}
4160
4161static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4162 uint32_t wave, uint32_t thread,
4163 uint32_t regno, uint32_t num, uint32_t *out)
4164{
4165 WREG32(mmSQ_IND_INDEX,
4166 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4167 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4168 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4169 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4170 (SQ_IND_INDEX__FORCE_READ_MASK) |
4171 (SQ_IND_INDEX__AUTO_INCR_MASK));
4172 while (num--)
4173 *(out++) = RREG32(mmSQ_IND_DATA);
4174}
4175
4176static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4177{
4178
4179 dst[(*no_fields)++] = 0;
4180 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4181 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4182 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4198}
4199
4200static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4201 uint32_t wave, uint32_t start,
4202 uint32_t size, uint32_t *dst)
4203{
4204 wave_read_regs(
4205 adev, simd, wave, 0,
4206 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4207}
4208
4209static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4210 u32 me, u32 pipe, u32 q, u32 vm)
4211{
4212 cik_srbm_select(adev, me, pipe, q, vm);
4213}
4214
4215static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4216 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4217 .select_se_sh = &gfx_v7_0_select_se_sh,
4218 .read_wave_data = &gfx_v7_0_read_wave_data,
4219 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4220 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4221};
4222
4223static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4224 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4225 .set_safe_mode = gfx_v7_0_set_safe_mode,
4226 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4227 .init = gfx_v7_0_rlc_init,
4228 .get_csb_size = gfx_v7_0_get_csb_size,
4229 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4230 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4231 .resume = gfx_v7_0_rlc_resume,
4232 .stop = gfx_v7_0_rlc_stop,
4233 .reset = gfx_v7_0_rlc_reset,
4234 .start = gfx_v7_0_rlc_start,
4235 .update_spm_vmid = gfx_v7_0_update_spm_vmid
4236};
4237
4238static int gfx_v7_0_early_init(void *handle)
4239{
4240 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4241
4242 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4243 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4244 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4245 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4246 gfx_v7_0_set_ring_funcs(adev);
4247 gfx_v7_0_set_irq_funcs(adev);
4248 gfx_v7_0_set_gds_init(adev);
4249
4250 return 0;
4251}
4252
4253static int gfx_v7_0_late_init(void *handle)
4254{
4255 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4256 int r;
4257
4258 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4259 if (r)
4260 return r;
4261
4262 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4263 if (r)
4264 return r;
4265
4266 return 0;
4267}
4268
4269static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4270{
4271 u32 gb_addr_config;
4272 u32 mc_arb_ramcfg;
4273 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4274 u32 tmp;
4275
4276 switch (adev->asic_type) {
4277 case CHIP_BONAIRE:
4278 adev->gfx.config.max_shader_engines = 2;
4279 adev->gfx.config.max_tile_pipes = 4;
4280 adev->gfx.config.max_cu_per_sh = 7;
4281 adev->gfx.config.max_sh_per_se = 1;
4282 adev->gfx.config.max_backends_per_se = 2;
4283 adev->gfx.config.max_texture_channel_caches = 4;
4284 adev->gfx.config.max_gprs = 256;
4285 adev->gfx.config.max_gs_threads = 32;
4286 adev->gfx.config.max_hw_contexts = 8;
4287
4288 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4289 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4290 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4291 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4292 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4293 break;
4294 case CHIP_HAWAII:
4295 adev->gfx.config.max_shader_engines = 4;
4296 adev->gfx.config.max_tile_pipes = 16;
4297 adev->gfx.config.max_cu_per_sh = 11;
4298 adev->gfx.config.max_sh_per_se = 1;
4299 adev->gfx.config.max_backends_per_se = 4;
4300 adev->gfx.config.max_texture_channel_caches = 16;
4301 adev->gfx.config.max_gprs = 256;
4302 adev->gfx.config.max_gs_threads = 32;
4303 adev->gfx.config.max_hw_contexts = 8;
4304
4305 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4306 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4307 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4308 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4309 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4310 break;
4311 case CHIP_KAVERI:
4312 adev->gfx.config.max_shader_engines = 1;
4313 adev->gfx.config.max_tile_pipes = 4;
4314 adev->gfx.config.max_cu_per_sh = 8;
4315 adev->gfx.config.max_backends_per_se = 2;
4316 adev->gfx.config.max_sh_per_se = 1;
4317 adev->gfx.config.max_texture_channel_caches = 4;
4318 adev->gfx.config.max_gprs = 256;
4319 adev->gfx.config.max_gs_threads = 16;
4320 adev->gfx.config.max_hw_contexts = 8;
4321
4322 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4323 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4324 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4325 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4326 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4327 break;
4328 case CHIP_KABINI:
4329 case CHIP_MULLINS:
4330 default:
4331 adev->gfx.config.max_shader_engines = 1;
4332 adev->gfx.config.max_tile_pipes = 2;
4333 adev->gfx.config.max_cu_per_sh = 2;
4334 adev->gfx.config.max_sh_per_se = 1;
4335 adev->gfx.config.max_backends_per_se = 1;
4336 adev->gfx.config.max_texture_channel_caches = 2;
4337 adev->gfx.config.max_gprs = 256;
4338 adev->gfx.config.max_gs_threads = 16;
4339 adev->gfx.config.max_hw_contexts = 8;
4340
4341 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4342 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4343 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4344 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4345 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4346 break;
4347 }
4348
4349 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4350 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4351
4352 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
4353 MC_ARB_RAMCFG, NOOFBANK);
4354 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
4355 MC_ARB_RAMCFG, NOOFRANKS);
4356
4357 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4358 adev->gfx.config.mem_max_burst_length_bytes = 256;
4359 if (adev->flags & AMD_IS_APU) {
4360
4361 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4362 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4363 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4364
4365 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4366 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4367 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4368
4369
4370 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4371 dimm00_addr_map = 0;
4372 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4373 dimm01_addr_map = 0;
4374 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4375 dimm10_addr_map = 0;
4376 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4377 dimm11_addr_map = 0;
4378
4379
4380
4381 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4382 adev->gfx.config.mem_row_size_in_kb = 2;
4383 else
4384 adev->gfx.config.mem_row_size_in_kb = 1;
4385 } else {
4386 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4387 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4388 if (adev->gfx.config.mem_row_size_in_kb > 4)
4389 adev->gfx.config.mem_row_size_in_kb = 4;
4390 }
4391
4392 adev->gfx.config.shader_engine_tile_size = 32;
4393 adev->gfx.config.num_gpus = 1;
4394 adev->gfx.config.multi_gpu_tile_size = 64;
4395
4396
4397 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4398 switch (adev->gfx.config.mem_row_size_in_kb) {
4399 case 1:
4400 default:
4401 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4402 break;
4403 case 2:
4404 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4405 break;
4406 case 4:
4407 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4408 break;
4409 }
4410 adev->gfx.config.gb_addr_config = gb_addr_config;
4411}
4412
4413static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4414 int mec, int pipe, int queue)
4415{
4416 int r;
4417 unsigned irq_type;
4418 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4419
4420
4421 ring->me = mec + 1;
4422 ring->pipe = pipe;
4423 ring->queue = queue;
4424
4425 ring->ring_obj = NULL;
4426 ring->use_doorbell = true;
4427 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
4428 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4429
4430 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4431 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4432 + ring->pipe;
4433
4434
4435 r = amdgpu_ring_init(adev, ring, 1024,
4436 &adev->gfx.eop_irq, irq_type,
4437 AMDGPU_RING_PRIO_DEFAULT);
4438 if (r)
4439 return r;
4440
4441
4442 return 0;
4443}
4444
4445static int gfx_v7_0_sw_init(void *handle)
4446{
4447 struct amdgpu_ring *ring;
4448 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4449 int i, j, k, r, ring_id;
4450
4451 switch (adev->asic_type) {
4452 case CHIP_KAVERI:
4453 adev->gfx.mec.num_mec = 2;
4454 break;
4455 case CHIP_BONAIRE:
4456 case CHIP_HAWAII:
4457 case CHIP_KABINI:
4458 case CHIP_MULLINS:
4459 default:
4460 adev->gfx.mec.num_mec = 1;
4461 break;
4462 }
4463 adev->gfx.mec.num_pipe_per_mec = 4;
4464 adev->gfx.mec.num_queue_per_pipe = 8;
4465
4466
4467 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4468 if (r)
4469 return r;
4470
4471
4472 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4473 &adev->gfx.priv_reg_irq);
4474 if (r)
4475 return r;
4476
4477
4478 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4479 &adev->gfx.priv_inst_irq);
4480 if (r)
4481 return r;
4482
4483 gfx_v7_0_scratch_init(adev);
4484
4485 r = gfx_v7_0_init_microcode(adev);
4486 if (r) {
4487 DRM_ERROR("Failed to load gfx firmware!\n");
4488 return r;
4489 }
4490
4491 r = adev->gfx.rlc.funcs->init(adev);
4492 if (r) {
4493 DRM_ERROR("Failed to init rlc BOs!\n");
4494 return r;
4495 }
4496
4497
4498 r = gfx_v7_0_mec_init(adev);
4499 if (r) {
4500 DRM_ERROR("Failed to init MEC BOs!\n");
4501 return r;
4502 }
4503
4504 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4505 ring = &adev->gfx.gfx_ring[i];
4506 ring->ring_obj = NULL;
4507 sprintf(ring->name, "gfx");
4508 r = amdgpu_ring_init(adev, ring, 1024,
4509 &adev->gfx.eop_irq,
4510 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
4511 AMDGPU_RING_PRIO_DEFAULT);
4512 if (r)
4513 return r;
4514 }
4515
4516
4517 ring_id = 0;
4518 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4519 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4520 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4521 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4522 continue;
4523
4524 r = gfx_v7_0_compute_ring_init(adev,
4525 ring_id,
4526 i, k, j);
4527 if (r)
4528 return r;
4529
4530 ring_id++;
4531 }
4532 }
4533 }
4534
4535 adev->gfx.ce_ram_size = 0x8000;
4536
4537 gfx_v7_0_gpu_early_init(adev);
4538
4539 return r;
4540}
4541
4542static int gfx_v7_0_sw_fini(void *handle)
4543{
4544 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4545 int i;
4546
4547 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4548 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4549 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4550 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4551
4552 gfx_v7_0_cp_compute_fini(adev);
4553 amdgpu_gfx_rlc_fini(adev);
4554 gfx_v7_0_mec_fini(adev);
4555 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4556 &adev->gfx.rlc.clear_state_gpu_addr,
4557 (void **)&adev->gfx.rlc.cs_ptr);
4558 if (adev->gfx.rlc.cp_table_size) {
4559 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4560 &adev->gfx.rlc.cp_table_gpu_addr,
4561 (void **)&adev->gfx.rlc.cp_table_ptr);
4562 }
4563 gfx_v7_0_free_microcode(adev);
4564
4565 return 0;
4566}
4567
4568static int gfx_v7_0_hw_init(void *handle)
4569{
4570 int r;
4571 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4572
4573 gfx_v7_0_constants_init(adev);
4574
4575
4576 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
4577
4578 r = adev->gfx.rlc.funcs->resume(adev);
4579 if (r)
4580 return r;
4581
4582 r = gfx_v7_0_cp_resume(adev);
4583 if (r)
4584 return r;
4585
4586 return r;
4587}
4588
4589static int gfx_v7_0_hw_fini(void *handle)
4590{
4591 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4592
4593 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4594 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4595 gfx_v7_0_cp_enable(adev, false);
4596 adev->gfx.rlc.funcs->stop(adev);
4597 gfx_v7_0_fini_pg(adev);
4598
4599 return 0;
4600}
4601
4602static int gfx_v7_0_suspend(void *handle)
4603{
4604 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4605
4606 return gfx_v7_0_hw_fini(adev);
4607}
4608
4609static int gfx_v7_0_resume(void *handle)
4610{
4611 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4612
4613 return gfx_v7_0_hw_init(adev);
4614}
4615
4616static bool gfx_v7_0_is_idle(void *handle)
4617{
4618 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4619
4620 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4621 return false;
4622 else
4623 return true;
4624}
4625
4626static int gfx_v7_0_wait_for_idle(void *handle)
4627{
4628 unsigned i;
4629 u32 tmp;
4630 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4631
4632 for (i = 0; i < adev->usec_timeout; i++) {
4633
4634 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4635
4636 if (!tmp)
4637 return 0;
4638 udelay(1);
4639 }
4640 return -ETIMEDOUT;
4641}
4642
4643static int gfx_v7_0_soft_reset(void *handle)
4644{
4645 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4646 u32 tmp;
4647 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4648
4649
4650 tmp = RREG32(mmGRBM_STATUS);
4651 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4652 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4653 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4654 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4655 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4656 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4657 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4658 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4659
4660 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4661 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4662 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4663 }
4664
4665
4666 tmp = RREG32(mmGRBM_STATUS2);
4667 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4668 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4669
4670
4671 tmp = RREG32(mmSRBM_STATUS);
4672 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4673 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4674
4675 if (grbm_soft_reset || srbm_soft_reset) {
4676
4677 gfx_v7_0_fini_pg(adev);
4678 gfx_v7_0_update_cg(adev, false);
4679
4680
4681 adev->gfx.rlc.funcs->stop(adev);
4682
4683
4684 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4685
4686
4687 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4688
4689 if (grbm_soft_reset) {
4690 tmp = RREG32(mmGRBM_SOFT_RESET);
4691 tmp |= grbm_soft_reset;
4692 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4693 WREG32(mmGRBM_SOFT_RESET, tmp);
4694 tmp = RREG32(mmGRBM_SOFT_RESET);
4695
4696 udelay(50);
4697
4698 tmp &= ~grbm_soft_reset;
4699 WREG32(mmGRBM_SOFT_RESET, tmp);
4700 tmp = RREG32(mmGRBM_SOFT_RESET);
4701 }
4702
4703 if (srbm_soft_reset) {
4704 tmp = RREG32(mmSRBM_SOFT_RESET);
4705 tmp |= srbm_soft_reset;
4706 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4707 WREG32(mmSRBM_SOFT_RESET, tmp);
4708 tmp = RREG32(mmSRBM_SOFT_RESET);
4709
4710 udelay(50);
4711
4712 tmp &= ~srbm_soft_reset;
4713 WREG32(mmSRBM_SOFT_RESET, tmp);
4714 tmp = RREG32(mmSRBM_SOFT_RESET);
4715 }
4716
4717 udelay(50);
4718 }
4719 return 0;
4720}
4721
4722static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4723 enum amdgpu_interrupt_state state)
4724{
4725 u32 cp_int_cntl;
4726
4727 switch (state) {
4728 case AMDGPU_IRQ_STATE_DISABLE:
4729 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4730 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4731 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4732 break;
4733 case AMDGPU_IRQ_STATE_ENABLE:
4734 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4735 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4736 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4737 break;
4738 default:
4739 break;
4740 }
4741}
4742
4743static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4744 int me, int pipe,
4745 enum amdgpu_interrupt_state state)
4746{
4747 u32 mec_int_cntl, mec_int_cntl_reg;
4748
4749
4750
4751
4752
4753
4754
4755 if (me == 1) {
4756 switch (pipe) {
4757 case 0:
4758 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4759 break;
4760 case 1:
4761 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4762 break;
4763 case 2:
4764 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4765 break;
4766 case 3:
4767 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4768 break;
4769 default:
4770 DRM_DEBUG("invalid pipe %d\n", pipe);
4771 return;
4772 }
4773 } else {
4774 DRM_DEBUG("invalid me %d\n", me);
4775 return;
4776 }
4777
4778 switch (state) {
4779 case AMDGPU_IRQ_STATE_DISABLE:
4780 mec_int_cntl = RREG32(mec_int_cntl_reg);
4781 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4782 WREG32(mec_int_cntl_reg, mec_int_cntl);
4783 break;
4784 case AMDGPU_IRQ_STATE_ENABLE:
4785 mec_int_cntl = RREG32(mec_int_cntl_reg);
4786 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4787 WREG32(mec_int_cntl_reg, mec_int_cntl);
4788 break;
4789 default:
4790 break;
4791 }
4792}
4793
4794static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4795 struct amdgpu_irq_src *src,
4796 unsigned type,
4797 enum amdgpu_interrupt_state state)
4798{
4799 u32 cp_int_cntl;
4800
4801 switch (state) {
4802 case AMDGPU_IRQ_STATE_DISABLE:
4803 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4804 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4805 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4806 break;
4807 case AMDGPU_IRQ_STATE_ENABLE:
4808 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4809 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4810 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4811 break;
4812 default:
4813 break;
4814 }
4815
4816 return 0;
4817}
4818
4819static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4820 struct amdgpu_irq_src *src,
4821 unsigned type,
4822 enum amdgpu_interrupt_state state)
4823{
4824 u32 cp_int_cntl;
4825
4826 switch (state) {
4827 case AMDGPU_IRQ_STATE_DISABLE:
4828 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4829 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4830 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4831 break;
4832 case AMDGPU_IRQ_STATE_ENABLE:
4833 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4834 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4835 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4836 break;
4837 default:
4838 break;
4839 }
4840
4841 return 0;
4842}
4843
4844static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4845 struct amdgpu_irq_src *src,
4846 unsigned type,
4847 enum amdgpu_interrupt_state state)
4848{
4849 switch (type) {
4850 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4851 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4852 break;
4853 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4854 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4855 break;
4856 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4857 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4858 break;
4859 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4860 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4861 break;
4862 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4863 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4864 break;
4865 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4866 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4867 break;
4868 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4869 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4870 break;
4871 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4872 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4873 break;
4874 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4875 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4876 break;
4877 default:
4878 break;
4879 }
4880 return 0;
4881}
4882
4883static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4884 struct amdgpu_irq_src *source,
4885 struct amdgpu_iv_entry *entry)
4886{
4887 u8 me_id, pipe_id;
4888 struct amdgpu_ring *ring;
4889 int i;
4890
4891 DRM_DEBUG("IH: CP EOP\n");
4892 me_id = (entry->ring_id & 0x0c) >> 2;
4893 pipe_id = (entry->ring_id & 0x03) >> 0;
4894 switch (me_id) {
4895 case 0:
4896 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4897 break;
4898 case 1:
4899 case 2:
4900 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4901 ring = &adev->gfx.compute_ring[i];
4902 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4903 amdgpu_fence_process(ring);
4904 }
4905 break;
4906 }
4907 return 0;
4908}
4909
4910static void gfx_v7_0_fault(struct amdgpu_device *adev,
4911 struct amdgpu_iv_entry *entry)
4912{
4913 struct amdgpu_ring *ring;
4914 u8 me_id, pipe_id;
4915 int i;
4916
4917 me_id = (entry->ring_id & 0x0c) >> 2;
4918 pipe_id = (entry->ring_id & 0x03) >> 0;
4919 switch (me_id) {
4920 case 0:
4921 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4922 break;
4923 case 1:
4924 case 2:
4925 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4926 ring = &adev->gfx.compute_ring[i];
4927 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4928 drm_sched_fault(&ring->sched);
4929 }
4930 break;
4931 }
4932}
4933
4934static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4935 struct amdgpu_irq_src *source,
4936 struct amdgpu_iv_entry *entry)
4937{
4938 DRM_ERROR("Illegal register access in command stream\n");
4939 gfx_v7_0_fault(adev, entry);
4940 return 0;
4941}
4942
4943static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4944 struct amdgpu_irq_src *source,
4945 struct amdgpu_iv_entry *entry)
4946{
4947 DRM_ERROR("Illegal instruction in command stream\n");
4948
4949 gfx_v7_0_fault(adev, entry);
4950 return 0;
4951}
4952
4953static int gfx_v7_0_set_clockgating_state(void *handle,
4954 enum amd_clockgating_state state)
4955{
4956 bool gate = false;
4957 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4958
4959 if (state == AMD_CG_STATE_GATE)
4960 gate = true;
4961
4962 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4963
4964 if (gate) {
4965 gfx_v7_0_enable_mgcg(adev, true);
4966 gfx_v7_0_enable_cgcg(adev, true);
4967 } else {
4968 gfx_v7_0_enable_cgcg(adev, false);
4969 gfx_v7_0_enable_mgcg(adev, false);
4970 }
4971 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4972
4973 return 0;
4974}
4975
4976static int gfx_v7_0_set_powergating_state(void *handle,
4977 enum amd_powergating_state state)
4978{
4979 bool gate = false;
4980 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981
4982 if (state == AMD_PG_STATE_GATE)
4983 gate = true;
4984
4985 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4986 AMD_PG_SUPPORT_GFX_SMG |
4987 AMD_PG_SUPPORT_GFX_DMG |
4988 AMD_PG_SUPPORT_CP |
4989 AMD_PG_SUPPORT_GDS |
4990 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4991 gfx_v7_0_update_gfx_pg(adev, gate);
4992 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4993 gfx_v7_0_enable_cp_pg(adev, gate);
4994 gfx_v7_0_enable_gds_pg(adev, gate);
4995 }
4996 }
4997
4998 return 0;
4999}
5000
5001static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
5002{
5003 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
5004 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
5005 PACKET3_TC_ACTION_ENA |
5006 PACKET3_SH_KCACHE_ACTION_ENA |
5007 PACKET3_SH_ICACHE_ACTION_ENA);
5008 amdgpu_ring_write(ring, 0xffffffff);
5009 amdgpu_ring_write(ring, 0);
5010 amdgpu_ring_write(ring, 0x0000000A);
5011}
5012
5013static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
5014{
5015 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
5016 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
5017 PACKET3_TC_ACTION_ENA |
5018 PACKET3_SH_KCACHE_ACTION_ENA |
5019 PACKET3_SH_ICACHE_ACTION_ENA);
5020 amdgpu_ring_write(ring, 0xffffffff);
5021 amdgpu_ring_write(ring, 0xff);
5022 amdgpu_ring_write(ring, 0);
5023 amdgpu_ring_write(ring, 0);
5024 amdgpu_ring_write(ring, 0x0000000A);
5025}
5026
5027static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5028 .name = "gfx_v7_0",
5029 .early_init = gfx_v7_0_early_init,
5030 .late_init = gfx_v7_0_late_init,
5031 .sw_init = gfx_v7_0_sw_init,
5032 .sw_fini = gfx_v7_0_sw_fini,
5033 .hw_init = gfx_v7_0_hw_init,
5034 .hw_fini = gfx_v7_0_hw_fini,
5035 .suspend = gfx_v7_0_suspend,
5036 .resume = gfx_v7_0_resume,
5037 .is_idle = gfx_v7_0_is_idle,
5038 .wait_for_idle = gfx_v7_0_wait_for_idle,
5039 .soft_reset = gfx_v7_0_soft_reset,
5040 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5041 .set_powergating_state = gfx_v7_0_set_powergating_state,
5042};
5043
5044static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5045 .type = AMDGPU_RING_TYPE_GFX,
5046 .align_mask = 0xff,
5047 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5048 .support_64bit_ptrs = false,
5049 .get_rptr = gfx_v7_0_ring_get_rptr,
5050 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5051 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5052 .emit_frame_size =
5053 20 +
5054 7 +
5055 5 +
5056 12 + 12 + 12 +
5057 7 + 4 +
5058 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
5059 3 + 4 +
5060 5,
5061 .emit_ib_size = 4,
5062 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5063 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5064 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5065 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5066 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5067 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5068 .test_ring = gfx_v7_0_ring_test_ring,
5069 .test_ib = gfx_v7_0_ring_test_ib,
5070 .insert_nop = amdgpu_ring_insert_nop,
5071 .pad_ib = amdgpu_ring_generic_pad_ib,
5072 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5073 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5074 .soft_recovery = gfx_v7_0_ring_soft_recovery,
5075 .emit_mem_sync = gfx_v7_0_emit_mem_sync,
5076};
5077
5078static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5079 .type = AMDGPU_RING_TYPE_COMPUTE,
5080 .align_mask = 0xff,
5081 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5082 .support_64bit_ptrs = false,
5083 .get_rptr = gfx_v7_0_ring_get_rptr,
5084 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5085 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5086 .emit_frame_size =
5087 20 +
5088 7 +
5089 5 +
5090 7 +
5091 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5092 7 + 7 + 7 +
5093 7,
5094 .emit_ib_size = 7,
5095 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5096 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5097 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5098 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5099 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5100 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5101 .test_ring = gfx_v7_0_ring_test_ring,
5102 .test_ib = gfx_v7_0_ring_test_ib,
5103 .insert_nop = amdgpu_ring_insert_nop,
5104 .pad_ib = amdgpu_ring_generic_pad_ib,
5105 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5106 .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
5107};
5108
5109static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5110{
5111 int i;
5112
5113 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5114 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5115 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5117}
5118
5119static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5120 .set = gfx_v7_0_set_eop_interrupt_state,
5121 .process = gfx_v7_0_eop_irq,
5122};
5123
5124static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5125 .set = gfx_v7_0_set_priv_reg_fault_state,
5126 .process = gfx_v7_0_priv_reg_irq,
5127};
5128
5129static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5130 .set = gfx_v7_0_set_priv_inst_fault_state,
5131 .process = gfx_v7_0_priv_inst_irq,
5132};
5133
5134static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5135{
5136 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5137 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5138
5139 adev->gfx.priv_reg_irq.num_types = 1;
5140 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5141
5142 adev->gfx.priv_inst_irq.num_types = 1;
5143 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5144}
5145
5146static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5147{
5148
5149 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
5150 adev->gds.gws_size = 64;
5151 adev->gds.oa_size = 16;
5152 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5153}
5154
5155
5156static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5157{
5158 int i, j, k, counter, active_cu_number = 0;
5159 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5160 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5161 unsigned disable_masks[4 * 2];
5162 u32 ao_cu_num;
5163
5164 if (adev->flags & AMD_IS_APU)
5165 ao_cu_num = 2;
5166 else
5167 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5168
5169 memset(cu_info, 0, sizeof(*cu_info));
5170
5171 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5172
5173 mutex_lock(&adev->grbm_idx_mutex);
5174 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5175 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5176 mask = 1;
5177 ao_bitmap = 0;
5178 counter = 0;
5179 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5180 if (i < 4 && j < 2)
5181 gfx_v7_0_set_user_cu_inactive_bitmap(
5182 adev, disable_masks[i * 2 + j]);
5183 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5184 cu_info->bitmap[i][j] = bitmap;
5185
5186 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5187 if (bitmap & mask) {
5188 if (counter < ao_cu_num)
5189 ao_bitmap |= mask;
5190 counter ++;
5191 }
5192 mask <<= 1;
5193 }
5194 active_cu_number += counter;
5195 if (i < 2 && j < 2)
5196 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5197 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5198 }
5199 }
5200 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5201 mutex_unlock(&adev->grbm_idx_mutex);
5202
5203 cu_info->number = active_cu_number;
5204 cu_info->ao_cu_mask = ao_cu_mask;
5205 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5206 cu_info->max_waves_per_simd = 10;
5207 cu_info->max_scratch_slots_per_cu = 32;
5208 cu_info->wave_front_size = 64;
5209 cu_info->lds_size = 64;
5210}
5211
5212const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5213{
5214 .type = AMD_IP_BLOCK_TYPE_GFX,
5215 .major = 7,
5216 .minor = 0,
5217 .rev = 0,
5218 .funcs = &gfx_v7_0_ip_funcs,
5219};
5220
5221const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5222{
5223 .type = AMD_IP_BLOCK_TYPE_GFX,
5224 .major = 7,
5225 .minor = 1,
5226 .rev = 0,
5227 .funcs = &gfx_v7_0_ip_funcs,
5228};
5229
5230const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5231{
5232 .type = AMD_IP_BLOCK_TYPE_GFX,
5233 .major = 7,
5234 .minor = 2,
5235 .rev = 0,
5236 .funcs = &gfx_v7_0_ip_funcs,
5237};
5238
5239const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5240{
5241 .type = AMD_IP_BLOCK_TYPE_GFX,
5242 .major = 7,
5243 .minor = 3,
5244 .rev = 0,
5245 .funcs = &gfx_v7_0_ip_funcs,
5246};
5247