1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/firmware.h>
24#include <drm/drmP.h>
25#include "amdgpu.h"
26#include "amdgpu_ih.h"
27#include "amdgpu_gfx.h"
28#include "cikd.h"
29#include "cik.h"
30#include "cik_structs.h"
31#include "atom.h"
32#include "amdgpu_ucode.h"
33#include "clearstate_ci.h"
34
35#include "dce/dce_8_0_d.h"
36#include "dce/dce_8_0_sh_mask.h"
37
38#include "bif/bif_4_1_d.h"
39#include "bif/bif_4_1_sh_mask.h"
40
41#include "gca/gfx_7_0_d.h"
42#include "gca/gfx_7_2_enum.h"
43#include "gca/gfx_7_2_sh_mask.h"
44
45#include "gmc/gmc_7_0_d.h"
46#include "gmc/gmc_7_0_sh_mask.h"
47
48#include "oss/oss_2_0_d.h"
49#include "oss/oss_2_0_sh_mask.h"
50
51#define NUM_SIMD_PER_CU 0x4
52
53#define GFX7_NUM_GFX_RINGS 1
54#define GFX7_MEC_HPD_SIZE 2048
55
56static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
57static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
58static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
59
60MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
61MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
62MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
63MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
64MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
65
66MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
67MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
68MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
69MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
70MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
71
72MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
73MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
74MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
75MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
76MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
77MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
78
79MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
80MODULE_FIRMWARE("amdgpu/kabini_me.bin");
81MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
82MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
83MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
84
85MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
86MODULE_FIRMWARE("amdgpu/mullins_me.bin");
87MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
88MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
89MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
90
91static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
92{
93 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
94 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
95 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
96 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
97 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
98 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
99 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
100 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
101 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
102 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
103 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
104 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
105 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
106 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
107 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
108 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
109};
110
111static const u32 spectre_rlc_save_restore_register_list[] =
112{
113 (0x0e00 << 16) | (0xc12c >> 2),
114 0x00000000,
115 (0x0e00 << 16) | (0xc140 >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc150 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc15c >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc168 >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc170 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc178 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc204 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc2b4 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b8 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2bc >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2c0 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0x8228 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x829c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x869c >> 2),
142 0x00000000,
143 (0x0600 << 16) | (0x98f4 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0x98f8 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x9900 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc260 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0x90e8 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x3c000 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c00c >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x8c1c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x9700 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0xcd20 >> 2),
162 0x00000000,
163 (0x4e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x5e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x6e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x7e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x8e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x9e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0xae00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xbe00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x89bc >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x8900 >> 2),
182 0x00000000,
183 0x3,
184 (0x0e00 << 16) | (0xc130 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc134 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc1fc >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc208 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc264 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc268 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc26c >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc270 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc274 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc278 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc27c >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc280 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc284 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc288 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc28c >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc290 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc294 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc298 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc29c >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc2a0 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a4 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a8 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2ac >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2b0 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0x301d0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x30238 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30250 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30254 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30258 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x3025c >> 2),
243 0x00000000,
244 (0x4e00 << 16) | (0xc900 >> 2),
245 0x00000000,
246 (0x5e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x6e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x7e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x8e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x9e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0xae00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xbe00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0x4e00 << 16) | (0xc904 >> 2),
261 0x00000000,
262 (0x5e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x6e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x7e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x8e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x9e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0xae00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xbe00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0x4e00 << 16) | (0xc908 >> 2),
277 0x00000000,
278 (0x5e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x6e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x7e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x8e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x9e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0xae00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xbe00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0x4e00 << 16) | (0xc90c >> 2),
293 0x00000000,
294 (0x5e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x6e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x7e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x8e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x9e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0xae00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xbe00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0x4e00 << 16) | (0xc910 >> 2),
309 0x00000000,
310 (0x5e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x6e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x7e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x8e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x9e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0xae00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xbe00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0x0e00 << 16) | (0xc99c >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0x9834 >> 2),
327 0x00000000,
328 (0x0000 << 16) | (0x30f00 >> 2),
329 0x00000000,
330 (0x0001 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0000 << 16) | (0x30f04 >> 2),
333 0x00000000,
334 (0x0001 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0000 << 16) | (0x30f08 >> 2),
337 0x00000000,
338 (0x0001 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0000 << 16) | (0x30f0c >> 2),
341 0x00000000,
342 (0x0001 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0600 << 16) | (0x9b7c >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x8a14 >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a18 >> 2),
349 0x00000000,
350 (0x0600 << 16) | (0x30a00 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0x8bf0 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bcc >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8b24 >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x30a04 >> 2),
359 0x00000000,
360 (0x0600 << 16) | (0x30a10 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a14 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a18 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a2c >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0xc700 >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc704 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc708 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc768 >> 2),
375 0x00000000,
376 (0x0400 << 16) | (0xc770 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc774 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc778 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc77c >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc780 >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc784 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc788 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc78c >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc798 >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc79c >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc7a0 >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a4 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a8 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7ac >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7b0 >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b4 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0x9100 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x3c010 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x92a8 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92ac >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92b4 >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b8 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92bc >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92c0 >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c4 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c8 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92cc >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92d0 >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x8c00 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c04 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c20 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c38 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c3c >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0xae00 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x9604 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0xac08 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac0c >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac10 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac14 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac58 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac68 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac6c >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac70 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac74 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac78 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac7c >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac80 >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac84 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac88 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac8c >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0x970c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x9714 >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9718 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x971c >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x31068 >> 2),
485 0x00000000,
486 (0x4e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x5e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x6e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x7e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x8e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x9e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0xae00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xbe00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0xcd10 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd14 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x88b0 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b4 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b8 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88bc >> 2),
513 0x00000000,
514 (0x0400 << 16) | (0x89c0 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x88c4 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c8 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88d0 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d4 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d8 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x8980 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x30938 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x3093c >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x30940 >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x89a0 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x30900 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30904 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x89b4 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x3c210 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c214 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c218 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x8904 >> 2),
549 0x00000000,
550 0x5,
551 (0x0e00 << 16) | (0x8c28 >> 2),
552 (0x0e00 << 16) | (0x8c2c >> 2),
553 (0x0e00 << 16) | (0x8c30 >> 2),
554 (0x0e00 << 16) | (0x8c34 >> 2),
555 (0x0e00 << 16) | (0x9600 >> 2),
556};
557
558static const u32 kalindi_rlc_save_restore_register_list[] =
559{
560 (0x0e00 << 16) | (0xc12c >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0xc140 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc150 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc15c >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc168 >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc170 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc204 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc2b4 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b8 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2bc >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2c0 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x8228 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x829c >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x869c >> 2),
587 0x00000000,
588 (0x0600 << 16) | (0x98f4 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0x98f8 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x9900 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0xc260 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0x90e8 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x3c000 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c00c >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x8c1c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x9700 >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0xcd20 >> 2),
607 0x00000000,
608 (0x4e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x5e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x6e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x7e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x89bc >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x8900 >> 2),
619 0x00000000,
620 0x3,
621 (0x0e00 << 16) | (0xc130 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc134 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc1fc >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc208 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc264 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc268 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc26c >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc270 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc274 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc28c >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc290 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc294 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc298 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc2a0 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a4 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a8 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2ac >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x301d0 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x30238 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30250 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30254 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30258 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x3025c >> 2),
666 0x00000000,
667 (0x4e00 << 16) | (0xc900 >> 2),
668 0x00000000,
669 (0x5e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x6e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x7e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x4e00 << 16) | (0xc904 >> 2),
676 0x00000000,
677 (0x5e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x6e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x7e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x4e00 << 16) | (0xc908 >> 2),
684 0x00000000,
685 (0x5e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x6e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x7e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x4e00 << 16) | (0xc90c >> 2),
692 0x00000000,
693 (0x5e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x6e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x7e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x4e00 << 16) | (0xc910 >> 2),
700 0x00000000,
701 (0x5e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x6e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x7e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x0e00 << 16) | (0xc99c >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0x9834 >> 2),
710 0x00000000,
711 (0x0000 << 16) | (0x30f00 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f04 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f08 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f0c >> 2),
718 0x00000000,
719 (0x0600 << 16) | (0x9b7c >> 2),
720 0x00000000,
721 (0x0e00 << 16) | (0x8a14 >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a18 >> 2),
724 0x00000000,
725 (0x0600 << 16) | (0x30a00 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0x8bf0 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bcc >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8b24 >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x30a04 >> 2),
734 0x00000000,
735 (0x0600 << 16) | (0x30a10 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a14 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a18 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a2c >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0xc700 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc704 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc708 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc768 >> 2),
750 0x00000000,
751 (0x0400 << 16) | (0xc770 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc774 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc798 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc79c >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0x9100 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x3c010 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x8c00 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c04 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c20 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c38 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c3c >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xae00 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0x9604 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xac08 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac0c >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac10 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac14 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac58 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac68 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac6c >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac70 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac74 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac78 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac7c >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac80 >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac84 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac88 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac8c >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0x970c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x9714 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9718 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x971c >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x31068 >> 2),
816 0x00000000,
817 (0x4e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x5e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x6e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x7e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xcd10 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd14 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x88b0 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b4 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b8 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88bc >> 2),
836 0x00000000,
837 (0x0400 << 16) | (0x89c0 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x88c4 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c8 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88d0 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d4 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d8 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x8980 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x30938 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x3093c >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x30940 >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x89a0 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x30900 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30904 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x89b4 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x3e1fc >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3c210 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c214 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c218 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x8904 >> 2),
874 0x00000000,
875 0x5,
876 (0x0e00 << 16) | (0x8c28 >> 2),
877 (0x0e00 << 16) | (0x8c2c >> 2),
878 (0x0e00 << 16) | (0x8c30 >> 2),
879 (0x0e00 << 16) | (0x8c34 >> 2),
880 (0x0e00 << 16) | (0x9600 >> 2),
881};
882
883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
885static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
886static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
887static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
888
889
890
891
892
893
894
895
896
897
898
899
900
901static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
902{
903 const char *chip_name;
904 char fw_name[30];
905 int err;
906
907 DRM_DEBUG("\n");
908
909 switch (adev->asic_type) {
910 case CHIP_BONAIRE:
911 chip_name = "bonaire";
912 break;
913 case CHIP_HAWAII:
914 chip_name = "hawaii";
915 break;
916 case CHIP_KAVERI:
917 chip_name = "kaveri";
918 break;
919 case CHIP_KABINI:
920 chip_name = "kabini";
921 break;
922 case CHIP_MULLINS:
923 chip_name = "mullins";
924 break;
925 default: BUG();
926 }
927
928 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
929 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
930 if (err)
931 goto out;
932 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
933 if (err)
934 goto out;
935
936 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
937 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
938 if (err)
939 goto out;
940 err = amdgpu_ucode_validate(adev->gfx.me_fw);
941 if (err)
942 goto out;
943
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946 if (err)
947 goto out;
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949 if (err)
950 goto out;
951
952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
953 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
954 if (err)
955 goto out;
956 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
957 if (err)
958 goto out;
959
960 if (adev->asic_type == CHIP_KAVERI) {
961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
962 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
963 if (err)
964 goto out;
965 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
966 if (err)
967 goto out;
968 }
969
970 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
971 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
972 if (err)
973 goto out;
974 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
975
976out:
977 if (err) {
978 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
979 release_firmware(adev->gfx.pfp_fw);
980 adev->gfx.pfp_fw = NULL;
981 release_firmware(adev->gfx.me_fw);
982 adev->gfx.me_fw = NULL;
983 release_firmware(adev->gfx.ce_fw);
984 adev->gfx.ce_fw = NULL;
985 release_firmware(adev->gfx.mec_fw);
986 adev->gfx.mec_fw = NULL;
987 release_firmware(adev->gfx.mec2_fw);
988 adev->gfx.mec2_fw = NULL;
989 release_firmware(adev->gfx.rlc_fw);
990 adev->gfx.rlc_fw = NULL;
991 }
992 return err;
993}
994
995static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
996{
997 release_firmware(adev->gfx.pfp_fw);
998 adev->gfx.pfp_fw = NULL;
999 release_firmware(adev->gfx.me_fw);
1000 adev->gfx.me_fw = NULL;
1001 release_firmware(adev->gfx.ce_fw);
1002 adev->gfx.ce_fw = NULL;
1003 release_firmware(adev->gfx.mec_fw);
1004 adev->gfx.mec_fw = NULL;
1005 release_firmware(adev->gfx.mec2_fw);
1006 adev->gfx.mec2_fw = NULL;
1007 release_firmware(adev->gfx.rlc_fw);
1008 adev->gfx.rlc_fw = NULL;
1009}
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1023{
1024 const u32 num_tile_mode_states =
1025 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1026 const u32 num_secondary_tile_mode_states =
1027 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1028 u32 reg_offset, split_equal_to_row_size;
1029 uint32_t *tile, *macrotile;
1030
1031 tile = adev->gfx.config.tile_mode_array;
1032 macrotile = adev->gfx.config.macrotile_mode_array;
1033
1034 switch (adev->gfx.config.mem_row_size_in_kb) {
1035 case 1:
1036 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1037 break;
1038 case 2:
1039 default:
1040 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1041 break;
1042 case 4:
1043 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1044 break;
1045 }
1046
1047 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1048 tile[reg_offset] = 0;
1049 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1050 macrotile[reg_offset] = 0;
1051
1052 switch (adev->asic_type) {
1053 case CHIP_BONAIRE:
1054 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1055 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1056 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1057 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1058 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1059 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1060 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1061 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1062 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1063 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1064 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1065 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1066 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1067 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1068 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1069 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1070 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1073 TILE_SPLIT(split_equal_to_row_size));
1074 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1075 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1076 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1077 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1079 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1080 TILE_SPLIT(split_equal_to_row_size));
1081 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1082 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1083 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1084 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1085 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1086 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1087 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1088 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1089 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1091 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1092 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1093 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1095 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1096 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1097 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1098 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1099 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1100 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1101 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1103 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1104 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1105 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1107 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1108 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1109 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1111 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1112 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1113 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1114 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1116 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1117 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1118 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1119 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1120 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1123 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1124 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1127 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1128 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1129 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1131 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1132 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1133 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1136 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1137 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1140 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1141 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1144 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1145 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1146 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1147 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1148 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1151 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1152 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1155 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1156
1157 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1160 NUM_BANKS(ADDR_SURF_16_BANK));
1161 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1164 NUM_BANKS(ADDR_SURF_16_BANK));
1165 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1168 NUM_BANKS(ADDR_SURF_16_BANK));
1169 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1172 NUM_BANKS(ADDR_SURF_16_BANK));
1173 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1176 NUM_BANKS(ADDR_SURF_16_BANK));
1177 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1180 NUM_BANKS(ADDR_SURF_8_BANK));
1181 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1184 NUM_BANKS(ADDR_SURF_4_BANK));
1185 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1188 NUM_BANKS(ADDR_SURF_16_BANK));
1189 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1192 NUM_BANKS(ADDR_SURF_16_BANK));
1193 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1196 NUM_BANKS(ADDR_SURF_16_BANK));
1197 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1200 NUM_BANKS(ADDR_SURF_16_BANK));
1201 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1204 NUM_BANKS(ADDR_SURF_16_BANK));
1205 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1208 NUM_BANKS(ADDR_SURF_8_BANK));
1209 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1212 NUM_BANKS(ADDR_SURF_4_BANK));
1213
1214 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1215 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1216 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1217 if (reg_offset != 7)
1218 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1219 break;
1220 case CHIP_HAWAII:
1221 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1222 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1225 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1226 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1229 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1230 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1233 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1234 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1237 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1238 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1240 TILE_SPLIT(split_equal_to_row_size));
1241 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1242 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1244 TILE_SPLIT(split_equal_to_row_size));
1245 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1246 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1248 TILE_SPLIT(split_equal_to_row_size));
1249 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1250 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1252 TILE_SPLIT(split_equal_to_row_size));
1253 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1255 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1256 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1258 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1262 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1266 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1267 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1270 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1271 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1273 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1275 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1277 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1278 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1281 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1282 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1283 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1285 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1286 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1289 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1290 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1291 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1293 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1294 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1295 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1296 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1297 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1300 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1301 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1302 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1304 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1305 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1306 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1308 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1309 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1312 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1313 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1314 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1316 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1317 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1320 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1322 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1324 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1326 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1327 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1329 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1331 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1333 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1335 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1336 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1337 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1339
1340 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1343 NUM_BANKS(ADDR_SURF_16_BANK));
1344 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1347 NUM_BANKS(ADDR_SURF_16_BANK));
1348 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1351 NUM_BANKS(ADDR_SURF_16_BANK));
1352 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1355 NUM_BANKS(ADDR_SURF_16_BANK));
1356 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1359 NUM_BANKS(ADDR_SURF_8_BANK));
1360 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1363 NUM_BANKS(ADDR_SURF_4_BANK));
1364 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1367 NUM_BANKS(ADDR_SURF_4_BANK));
1368 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1371 NUM_BANKS(ADDR_SURF_16_BANK));
1372 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1375 NUM_BANKS(ADDR_SURF_16_BANK));
1376 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1379 NUM_BANKS(ADDR_SURF_16_BANK));
1380 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1383 NUM_BANKS(ADDR_SURF_8_BANK));
1384 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1387 NUM_BANKS(ADDR_SURF_16_BANK));
1388 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1391 NUM_BANKS(ADDR_SURF_8_BANK));
1392 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1395 NUM_BANKS(ADDR_SURF_4_BANK));
1396
1397 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1398 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1399 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1400 if (reg_offset != 7)
1401 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1402 break;
1403 case CHIP_KABINI:
1404 case CHIP_KAVERI:
1405 case CHIP_MULLINS:
1406 default:
1407 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1408 PIPE_CONFIG(ADDR_SURF_P2) |
1409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1410 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1411 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1412 PIPE_CONFIG(ADDR_SURF_P2) |
1413 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1414 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1415 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1416 PIPE_CONFIG(ADDR_SURF_P2) |
1417 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1418 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1419 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1420 PIPE_CONFIG(ADDR_SURF_P2) |
1421 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1423 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1424 PIPE_CONFIG(ADDR_SURF_P2) |
1425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1426 TILE_SPLIT(split_equal_to_row_size));
1427 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1428 PIPE_CONFIG(ADDR_SURF_P2) |
1429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1430 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1431 PIPE_CONFIG(ADDR_SURF_P2) |
1432 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1433 TILE_SPLIT(split_equal_to_row_size));
1434 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1435 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1436 PIPE_CONFIG(ADDR_SURF_P2));
1437 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1438 PIPE_CONFIG(ADDR_SURF_P2) |
1439 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1440 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1441 PIPE_CONFIG(ADDR_SURF_P2) |
1442 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1444 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1445 PIPE_CONFIG(ADDR_SURF_P2) |
1446 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1448 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1449 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1450 PIPE_CONFIG(ADDR_SURF_P2) |
1451 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1452 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1453 PIPE_CONFIG(ADDR_SURF_P2) |
1454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1456 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1457 PIPE_CONFIG(ADDR_SURF_P2) |
1458 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1460 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1461 PIPE_CONFIG(ADDR_SURF_P2) |
1462 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1464 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1465 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1466 PIPE_CONFIG(ADDR_SURF_P2) |
1467 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1469 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1470 PIPE_CONFIG(ADDR_SURF_P2) |
1471 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1472 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1473 PIPE_CONFIG(ADDR_SURF_P2) |
1474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1476 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1477 PIPE_CONFIG(ADDR_SURF_P2) |
1478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1480 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1481 PIPE_CONFIG(ADDR_SURF_P2) |
1482 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1484 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1485 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1486 PIPE_CONFIG(ADDR_SURF_P2) |
1487 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1489 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1490 PIPE_CONFIG(ADDR_SURF_P2) |
1491 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1493 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1494 PIPE_CONFIG(ADDR_SURF_P2) |
1495 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1497 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1498 PIPE_CONFIG(ADDR_SURF_P2) |
1499 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1500 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1501 PIPE_CONFIG(ADDR_SURF_P2) |
1502 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1504 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1505 PIPE_CONFIG(ADDR_SURF_P2) |
1506 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1508 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1509
1510 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1513 NUM_BANKS(ADDR_SURF_8_BANK));
1514 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1517 NUM_BANKS(ADDR_SURF_8_BANK));
1518 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1521 NUM_BANKS(ADDR_SURF_8_BANK));
1522 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1525 NUM_BANKS(ADDR_SURF_8_BANK));
1526 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1529 NUM_BANKS(ADDR_SURF_8_BANK));
1530 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1533 NUM_BANKS(ADDR_SURF_8_BANK));
1534 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1537 NUM_BANKS(ADDR_SURF_8_BANK));
1538 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1541 NUM_BANKS(ADDR_SURF_16_BANK));
1542 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1543 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1544 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1545 NUM_BANKS(ADDR_SURF_16_BANK));
1546 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1549 NUM_BANKS(ADDR_SURF_16_BANK));
1550 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1553 NUM_BANKS(ADDR_SURF_16_BANK));
1554 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1557 NUM_BANKS(ADDR_SURF_16_BANK));
1558 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1561 NUM_BANKS(ADDR_SURF_16_BANK));
1562 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1565 NUM_BANKS(ADDR_SURF_8_BANK));
1566
1567 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1568 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1569 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1570 if (reg_offset != 7)
1571 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1572 break;
1573 }
1574}
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1588 u32 se_num, u32 sh_num, u32 instance)
1589{
1590 u32 data;
1591
1592 if (instance == 0xffffffff)
1593 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1594 else
1595 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1596
1597 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1598 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1599 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1600 else if (se_num == 0xffffffff)
1601 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1602 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1603 else if (sh_num == 0xffffffff)
1604 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1605 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1606 else
1607 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1608 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1609 WREG32(mmGRBM_GFX_INDEX, data);
1610}
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1621{
1622 u32 data, mask;
1623
1624 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1625 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1626
1627 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1628 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1629
1630 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1631 adev->gfx.config.max_sh_per_se);
1632
1633 return (~data) & mask;
1634}
1635
1636static void
1637gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1638{
1639 switch (adev->asic_type) {
1640 case CHIP_BONAIRE:
1641 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1642 SE_XSEL(1) | SE_YSEL(1);
1643 *rconf1 |= 0x0;
1644 break;
1645 case CHIP_HAWAII:
1646 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1647 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1648 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1649 SE_YSEL(3);
1650 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1651 SE_PAIR_YSEL(2);
1652 break;
1653 case CHIP_KAVERI:
1654 *rconf |= RB_MAP_PKR0(2);
1655 *rconf1 |= 0x0;
1656 break;
1657 case CHIP_KABINI:
1658 case CHIP_MULLINS:
1659 *rconf |= 0x0;
1660 *rconf1 |= 0x0;
1661 break;
1662 default:
1663 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1664 break;
1665 }
1666}
1667
1668static void
1669gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1670 u32 raster_config, u32 raster_config_1,
1671 unsigned rb_mask, unsigned num_rb)
1672{
1673 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1674 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1675 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1676 unsigned rb_per_se = num_rb / num_se;
1677 unsigned se_mask[4];
1678 unsigned se;
1679
1680 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1681 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1682 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1683 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1684
1685 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1686 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1687 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1688
1689 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1690 (!se_mask[2] && !se_mask[3]))) {
1691 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1692
1693 if (!se_mask[0] && !se_mask[1]) {
1694 raster_config_1 |=
1695 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1696 } else {
1697 raster_config_1 |=
1698 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1699 }
1700 }
1701
1702 for (se = 0; se < num_se; se++) {
1703 unsigned raster_config_se = raster_config;
1704 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1705 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1706 int idx = (se / 2) * 2;
1707
1708 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1709 raster_config_se &= ~SE_MAP_MASK;
1710
1711 if (!se_mask[idx]) {
1712 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1713 } else {
1714 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1715 }
1716 }
1717
1718 pkr0_mask &= rb_mask;
1719 pkr1_mask &= rb_mask;
1720 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1721 raster_config_se &= ~PKR_MAP_MASK;
1722
1723 if (!pkr0_mask) {
1724 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1725 } else {
1726 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1727 }
1728 }
1729
1730 if (rb_per_se >= 2) {
1731 unsigned rb0_mask = 1 << (se * rb_per_se);
1732 unsigned rb1_mask = rb0_mask << 1;
1733
1734 rb0_mask &= rb_mask;
1735 rb1_mask &= rb_mask;
1736 if (!rb0_mask || !rb1_mask) {
1737 raster_config_se &= ~RB_MAP_PKR0_MASK;
1738
1739 if (!rb0_mask) {
1740 raster_config_se |=
1741 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1742 } else {
1743 raster_config_se |=
1744 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1745 }
1746 }
1747
1748 if (rb_per_se > 2) {
1749 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1750 rb1_mask = rb0_mask << 1;
1751 rb0_mask &= rb_mask;
1752 rb1_mask &= rb_mask;
1753 if (!rb0_mask || !rb1_mask) {
1754 raster_config_se &= ~RB_MAP_PKR1_MASK;
1755
1756 if (!rb0_mask) {
1757 raster_config_se |=
1758 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1759 } else {
1760 raster_config_se |=
1761 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1762 }
1763 }
1764 }
1765 }
1766
1767
1768 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1769 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1770 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1771 }
1772
1773
1774 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1775}
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1787{
1788 int i, j;
1789 u32 data;
1790 u32 raster_config = 0, raster_config_1 = 0;
1791 u32 active_rbs = 0;
1792 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1793 adev->gfx.config.max_sh_per_se;
1794 unsigned num_rb_pipes;
1795
1796 mutex_lock(&adev->grbm_idx_mutex);
1797 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1798 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1799 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1800 data = gfx_v7_0_get_rb_active_bitmap(adev);
1801 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1802 rb_bitmap_width_per_sh);
1803 }
1804 }
1805 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1806
1807 adev->gfx.config.backend_enable_mask = active_rbs;
1808 adev->gfx.config.num_rbs = hweight32(active_rbs);
1809
1810 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1811 adev->gfx.config.max_shader_engines, 16);
1812
1813 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1814
1815 if (!adev->gfx.config.backend_enable_mask ||
1816 adev->gfx.config.num_rbs >= num_rb_pipes) {
1817 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1818 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1819 } else {
1820 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1821 adev->gfx.config.backend_enable_mask,
1822 num_rb_pipes);
1823 }
1824
1825
1826 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1827 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1828 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1829 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1830 RREG32(mmCC_RB_BACKEND_DISABLE);
1831 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1832 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1833 adev->gfx.config.rb_config[i][j].raster_config =
1834 RREG32(mmPA_SC_RASTER_CONFIG);
1835 adev->gfx.config.rb_config[i][j].raster_config_1 =
1836 RREG32(mmPA_SC_RASTER_CONFIG_1);
1837 }
1838 }
1839 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1840 mutex_unlock(&adev->grbm_idx_mutex);
1841}
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851#define DEFAULT_SH_MEM_BASES (0x6000)
1852#define FIRST_COMPUTE_VMID (8)
1853#define LAST_COMPUTE_VMID (16)
1854static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1855{
1856 int i;
1857 uint32_t sh_mem_config;
1858 uint32_t sh_mem_bases;
1859
1860
1861
1862
1863
1864
1865
1866 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1867 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1868 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1869 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1870 mutex_lock(&adev->srbm_mutex);
1871 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1872 cik_srbm_select(adev, 0, 0, 0, i);
1873
1874 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1875 WREG32(mmSH_MEM_APE1_BASE, 1);
1876 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1877 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1878 }
1879 cik_srbm_select(adev, 0, 0, 0, 0);
1880 mutex_unlock(&adev->srbm_mutex);
1881}
1882
1883static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1884{
1885 adev->gfx.config.double_offchip_lds_buf = 1;
1886}
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1897{
1898 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1899 u32 tmp;
1900 int i;
1901
1902 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1903
1904 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1905 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1906 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1907
1908 gfx_v7_0_tiling_mode_table_init(adev);
1909
1910 gfx_v7_0_setup_rb(adev);
1911 gfx_v7_0_get_cu_info(adev);
1912 gfx_v7_0_config_init(adev);
1913
1914
1915 WREG32(mmCP_MEQ_THRESHOLDS,
1916 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1917 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1918
1919 mutex_lock(&adev->grbm_idx_mutex);
1920
1921
1922
1923
1924 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1925
1926
1927
1928 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1929 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1930 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1931 MTYPE_NC);
1932 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1933 MTYPE_UC);
1934 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1935
1936 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1937 SWIZZLE_ENABLE, 1);
1938 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1939 ELEMENT_SIZE, 1);
1940 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1941 INDEX_STRIDE, 3);
1942 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1943
1944 mutex_lock(&adev->srbm_mutex);
1945 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1946 if (i == 0)
1947 sh_mem_base = 0;
1948 else
1949 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1950 cik_srbm_select(adev, 0, 0, 0, i);
1951
1952 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1953 WREG32(mmSH_MEM_APE1_BASE, 1);
1954 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1955 WREG32(mmSH_MEM_BASES, sh_mem_base);
1956 }
1957 cik_srbm_select(adev, 0, 0, 0, 0);
1958 mutex_unlock(&adev->srbm_mutex);
1959
1960 gfx_v7_0_init_compute_vmid(adev);
1961
1962 WREG32(mmSX_DEBUG_1, 0x20);
1963
1964 WREG32(mmTA_CNTL_AUX, 0x00010000);
1965
1966 tmp = RREG32(mmSPI_CONFIG_CNTL);
1967 tmp |= 0x03000000;
1968 WREG32(mmSPI_CONFIG_CNTL, tmp);
1969
1970 WREG32(mmSQ_CONFIG, 1);
1971
1972 WREG32(mmDB_DEBUG, 0);
1973
1974 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
1975 tmp |= 0x00000400;
1976 WREG32(mmDB_DEBUG2, tmp);
1977
1978 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
1979 tmp |= 0x00020200;
1980 WREG32(mmDB_DEBUG3, tmp);
1981
1982 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
1983 tmp |= 0x00018208;
1984 WREG32(mmCB_HW_CONTROL, tmp);
1985
1986 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
1987
1988 WREG32(mmPA_SC_FIFO_SIZE,
1989 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1990 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1991 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1992 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
1993
1994 WREG32(mmVGT_NUM_INSTANCES, 1);
1995
1996 WREG32(mmCP_PERFMON_CNTL, 0);
1997
1998 WREG32(mmSQ_CONFIG, 0);
1999
2000 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2001 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2002 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2003
2004 WREG32(mmVGT_CACHE_INVALIDATION,
2005 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2006 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2007
2008 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2009 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2010
2011 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2012 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2013 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2014
2015 tmp = RREG32(mmSPI_ARB_PRIORITY);
2016 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2017 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2018 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2019 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2020 WREG32(mmSPI_ARB_PRIORITY, tmp);
2021
2022 mutex_unlock(&adev->grbm_idx_mutex);
2023
2024 udelay(50);
2025}
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2041{
2042 adev->gfx.scratch.num_reg = 8;
2043 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2044 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2045}
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2059{
2060 struct amdgpu_device *adev = ring->adev;
2061 uint32_t scratch;
2062 uint32_t tmp = 0;
2063 unsigned i;
2064 int r;
2065
2066 r = amdgpu_gfx_scratch_get(adev, &scratch);
2067 if (r) {
2068 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
2069 return r;
2070 }
2071 WREG32(scratch, 0xCAFEDEAD);
2072 r = amdgpu_ring_alloc(ring, 3);
2073 if (r) {
2074 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
2075 amdgpu_gfx_scratch_free(adev, scratch);
2076 return r;
2077 }
2078 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2079 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2080 amdgpu_ring_write(ring, 0xDEADBEEF);
2081 amdgpu_ring_commit(ring);
2082
2083 for (i = 0; i < adev->usec_timeout; i++) {
2084 tmp = RREG32(scratch);
2085 if (tmp == 0xDEADBEEF)
2086 break;
2087 DRM_UDELAY(1);
2088 }
2089 if (i < adev->usec_timeout) {
2090 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2091 } else {
2092 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2093 ring->idx, scratch, tmp);
2094 r = -EINVAL;
2095 }
2096 amdgpu_gfx_scratch_free(adev, scratch);
2097 return r;
2098}
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2109{
2110 u32 ref_and_mask;
2111 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2112
2113 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2114 switch (ring->me) {
2115 case 1:
2116 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2117 break;
2118 case 2:
2119 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2120 break;
2121 default:
2122 return;
2123 }
2124 } else {
2125 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2126 }
2127
2128 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2129 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2130 WAIT_REG_MEM_FUNCTION(3) |
2131 WAIT_REG_MEM_ENGINE(usepfp)));
2132 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2133 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2134 amdgpu_ring_write(ring, ref_and_mask);
2135 amdgpu_ring_write(ring, ref_and_mask);
2136 amdgpu_ring_write(ring, 0x20);
2137}
2138
2139static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2140{
2141 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2142 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2143 EVENT_INDEX(4));
2144
2145 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2146 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2147 EVENT_INDEX(0));
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2160 u64 seq, unsigned flags)
2161{
2162 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2163 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2164
2165
2166
2167 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2168 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2169 EOP_TC_ACTION_EN |
2170 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2171 EVENT_INDEX(5)));
2172 amdgpu_ring_write(ring, addr & 0xfffffffc);
2173 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2174 DATA_SEL(1) | INT_SEL(0));
2175 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2176 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2177
2178
2179 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2180 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2181 EOP_TC_ACTION_EN |
2182 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2183 EVENT_INDEX(5)));
2184 amdgpu_ring_write(ring, addr & 0xfffffffc);
2185 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2186 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2187 amdgpu_ring_write(ring, lower_32_bits(seq));
2188 amdgpu_ring_write(ring, upper_32_bits(seq));
2189}
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2201 u64 addr, u64 seq,
2202 unsigned flags)
2203{
2204 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2205 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2206
2207
2208 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2209 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2210 EOP_TC_ACTION_EN |
2211 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2212 EVENT_INDEX(5)));
2213 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2214 amdgpu_ring_write(ring, addr & 0xfffffffc);
2215 amdgpu_ring_write(ring, upper_32_bits(addr));
2216 amdgpu_ring_write(ring, lower_32_bits(seq));
2217 amdgpu_ring_write(ring, upper_32_bits(seq));
2218}
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2236 struct amdgpu_ib *ib,
2237 unsigned vmid, bool ctx_switch)
2238{
2239 u32 header, control = 0;
2240
2241
2242 if (ctx_switch) {
2243 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2244 amdgpu_ring_write(ring, 0);
2245 }
2246
2247 if (ib->flags & AMDGPU_IB_FLAG_CE)
2248 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2249 else
2250 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2251
2252 control |= ib->length_dw | (vmid << 24);
2253
2254 amdgpu_ring_write(ring, header);
2255 amdgpu_ring_write(ring,
2256#ifdef __BIG_ENDIAN
2257 (2 << 0) |
2258#endif
2259 (ib->gpu_addr & 0xFFFFFFFC));
2260 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2261 amdgpu_ring_write(ring, control);
2262}
2263
2264static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2265 struct amdgpu_ib *ib,
2266 unsigned vmid, bool ctx_switch)
2267{
2268 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2269
2270 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2271 amdgpu_ring_write(ring,
2272#ifdef __BIG_ENDIAN
2273 (2 << 0) |
2274#endif
2275 (ib->gpu_addr & 0xFFFFFFFC));
2276 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2277 amdgpu_ring_write(ring, control);
2278}
2279
2280static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2281{
2282 uint32_t dw2 = 0;
2283
2284 dw2 |= 0x80000000;
2285 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2286 gfx_v7_0_ring_emit_vgt_flush(ring);
2287
2288 dw2 |= 0x8001;
2289
2290 dw2 |= 0x01000000;
2291
2292 dw2 |= 0x10002;
2293 }
2294
2295 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2296 amdgpu_ring_write(ring, dw2);
2297 amdgpu_ring_write(ring, 0);
2298}
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2310{
2311 struct amdgpu_device *adev = ring->adev;
2312 struct amdgpu_ib ib;
2313 struct dma_fence *f = NULL;
2314 uint32_t scratch;
2315 uint32_t tmp = 0;
2316 long r;
2317
2318 r = amdgpu_gfx_scratch_get(adev, &scratch);
2319 if (r) {
2320 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
2321 return r;
2322 }
2323 WREG32(scratch, 0xCAFEDEAD);
2324 memset(&ib, 0, sizeof(ib));
2325 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2326 if (r) {
2327 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
2328 goto err1;
2329 }
2330 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2331 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2332 ib.ptr[2] = 0xDEADBEEF;
2333 ib.length_dw = 3;
2334
2335 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2336 if (r)
2337 goto err2;
2338
2339 r = dma_fence_wait_timeout(f, false, timeout);
2340 if (r == 0) {
2341 DRM_ERROR("amdgpu: IB test timed out\n");
2342 r = -ETIMEDOUT;
2343 goto err2;
2344 } else if (r < 0) {
2345 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
2346 goto err2;
2347 }
2348 tmp = RREG32(scratch);
2349 if (tmp == 0xDEADBEEF) {
2350 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
2351 r = 0;
2352 } else {
2353 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
2354 scratch, tmp);
2355 r = -EINVAL;
2356 }
2357
2358err2:
2359 amdgpu_ib_free(adev, &ib, NULL);
2360 dma_fence_put(f);
2361err1:
2362 amdgpu_gfx_scratch_free(adev, scratch);
2363 return r;
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2398{
2399 int i;
2400
2401 if (enable) {
2402 WREG32(mmCP_ME_CNTL, 0);
2403 } else {
2404 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2405 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2406 adev->gfx.gfx_ring[i].ready = false;
2407 }
2408 udelay(50);
2409}
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2420{
2421 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2422 const struct gfx_firmware_header_v1_0 *ce_hdr;
2423 const struct gfx_firmware_header_v1_0 *me_hdr;
2424 const __le32 *fw_data;
2425 unsigned i, fw_size;
2426
2427 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2428 return -EINVAL;
2429
2430 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2431 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2432 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2433
2434 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2435 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2436 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2437 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2438 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2439 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2440 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2441 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2442 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2443
2444 gfx_v7_0_cp_gfx_enable(adev, false);
2445
2446
2447 fw_data = (const __le32 *)
2448 (adev->gfx.pfp_fw->data +
2449 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2450 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2451 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2452 for (i = 0; i < fw_size; i++)
2453 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2454 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2455
2456
2457 fw_data = (const __le32 *)
2458 (adev->gfx.ce_fw->data +
2459 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2460 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2461 WREG32(mmCP_CE_UCODE_ADDR, 0);
2462 for (i = 0; i < fw_size; i++)
2463 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2464 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2465
2466
2467 fw_data = (const __le32 *)
2468 (adev->gfx.me_fw->data +
2469 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2470 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2471 WREG32(mmCP_ME_RAM_WADDR, 0);
2472 for (i = 0; i < fw_size; i++)
2473 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2474 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2475
2476 return 0;
2477}
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2489{
2490 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2491 const struct cs_section_def *sect = NULL;
2492 const struct cs_extent_def *ext = NULL;
2493 int r, i;
2494
2495
2496 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2497 WREG32(mmCP_ENDIAN_SWAP, 0);
2498 WREG32(mmCP_DEVICE_ID, 1);
2499
2500 gfx_v7_0_cp_gfx_enable(adev, true);
2501
2502 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2503 if (r) {
2504 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2505 return r;
2506 }
2507
2508
2509 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2510 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2511 amdgpu_ring_write(ring, 0x8000);
2512 amdgpu_ring_write(ring, 0x8000);
2513
2514
2515 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2516 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2517
2518 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2519 amdgpu_ring_write(ring, 0x80000000);
2520 amdgpu_ring_write(ring, 0x80000000);
2521
2522 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2523 for (ext = sect->section; ext->extent != NULL; ++ext) {
2524 if (sect->id == SECT_CONTEXT) {
2525 amdgpu_ring_write(ring,
2526 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2527 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2528 for (i = 0; i < ext->reg_count; i++)
2529 amdgpu_ring_write(ring, ext->extent[i]);
2530 }
2531 }
2532 }
2533
2534 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2535 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2536 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2537 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2538
2539 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2540 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2541
2542 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2543 amdgpu_ring_write(ring, 0);
2544
2545 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2546 amdgpu_ring_write(ring, 0x00000316);
2547 amdgpu_ring_write(ring, 0x0000000e);
2548 amdgpu_ring_write(ring, 0x00000010);
2549
2550 amdgpu_ring_commit(ring);
2551
2552 return 0;
2553}
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2565{
2566 struct amdgpu_ring *ring;
2567 u32 tmp;
2568 u32 rb_bufsz;
2569 u64 rb_addr, rptr_addr;
2570 int r;
2571
2572 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2573 if (adev->asic_type != CHIP_HAWAII)
2574 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2575
2576
2577 WREG32(mmCP_RB_WPTR_DELAY, 0);
2578
2579
2580 WREG32(mmCP_RB_VMID, 0);
2581
2582 WREG32(mmSCRATCH_ADDR, 0);
2583
2584
2585
2586 ring = &adev->gfx.gfx_ring[0];
2587 rb_bufsz = order_base_2(ring->ring_size / 8);
2588 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2589#ifdef __BIG_ENDIAN
2590 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2591#endif
2592 WREG32(mmCP_RB0_CNTL, tmp);
2593
2594
2595 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2596 ring->wptr = 0;
2597 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2598
2599
2600 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2601 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2602 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2603
2604
2605 WREG32(mmSCRATCH_UMSK, 0);
2606
2607 mdelay(1);
2608 WREG32(mmCP_RB0_CNTL, tmp);
2609
2610 rb_addr = ring->gpu_addr >> 8;
2611 WREG32(mmCP_RB0_BASE, rb_addr);
2612 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2613
2614
2615 gfx_v7_0_cp_gfx_start(adev);
2616 ring->ready = true;
2617 r = amdgpu_ring_test_ring(ring);
2618 if (r) {
2619 ring->ready = false;
2620 return r;
2621 }
2622
2623 return 0;
2624}
2625
2626static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2627{
2628 return ring->adev->wb.wb[ring->rptr_offs];
2629}
2630
2631static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2632{
2633 struct amdgpu_device *adev = ring->adev;
2634
2635 return RREG32(mmCP_RB0_WPTR);
2636}
2637
2638static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2639{
2640 struct amdgpu_device *adev = ring->adev;
2641
2642 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2643 (void)RREG32(mmCP_RB0_WPTR);
2644}
2645
2646static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2647{
2648
2649 return ring->adev->wb.wb[ring->wptr_offs];
2650}
2651
2652static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2653{
2654 struct amdgpu_device *adev = ring->adev;
2655
2656
2657 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2658 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2659}
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2670{
2671 int i;
2672
2673 if (enable) {
2674 WREG32(mmCP_MEC_CNTL, 0);
2675 } else {
2676 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2677 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2678 adev->gfx.compute_ring[i].ready = false;
2679 }
2680 udelay(50);
2681}
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2692{
2693 const struct gfx_firmware_header_v1_0 *mec_hdr;
2694 const __le32 *fw_data;
2695 unsigned i, fw_size;
2696
2697 if (!adev->gfx.mec_fw)
2698 return -EINVAL;
2699
2700 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2701 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2702 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2703 adev->gfx.mec_feature_version = le32_to_cpu(
2704 mec_hdr->ucode_feature_version);
2705
2706 gfx_v7_0_cp_compute_enable(adev, false);
2707
2708
2709 fw_data = (const __le32 *)
2710 (adev->gfx.mec_fw->data +
2711 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2712 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2713 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2714 for (i = 0; i < fw_size; i++)
2715 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2716 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2717
2718 if (adev->asic_type == CHIP_KAVERI) {
2719 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2720
2721 if (!adev->gfx.mec2_fw)
2722 return -EINVAL;
2723
2724 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2725 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2726 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2727 adev->gfx.mec2_feature_version = le32_to_cpu(
2728 mec2_hdr->ucode_feature_version);
2729
2730
2731 fw_data = (const __le32 *)
2732 (adev->gfx.mec2_fw->data +
2733 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2734 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2735 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2736 for (i = 0; i < fw_size; i++)
2737 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2738 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2739 }
2740
2741 return 0;
2742}
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2753{
2754 int i;
2755
2756 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2757 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2758
2759 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2760 }
2761}
2762
2763static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2764{
2765 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2766}
2767
2768static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2769{
2770 int r;
2771 u32 *hpd;
2772 size_t mec_hpd_size;
2773
2774 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2775
2776
2777 amdgpu_gfx_compute_queue_acquire(adev);
2778
2779
2780 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2781 * GFX7_MEC_HPD_SIZE * 2;
2782
2783 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2784 AMDGPU_GEM_DOMAIN_GTT,
2785 &adev->gfx.mec.hpd_eop_obj,
2786 &adev->gfx.mec.hpd_eop_gpu_addr,
2787 (void **)&hpd);
2788 if (r) {
2789 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2790 gfx_v7_0_mec_fini(adev);
2791 return r;
2792 }
2793
2794
2795 memset(hpd, 0, mec_hpd_size);
2796
2797 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2798 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2799
2800 return 0;
2801}
2802
2803struct hqd_registers
2804{
2805 u32 cp_mqd_base_addr;
2806 u32 cp_mqd_base_addr_hi;
2807 u32 cp_hqd_active;
2808 u32 cp_hqd_vmid;
2809 u32 cp_hqd_persistent_state;
2810 u32 cp_hqd_pipe_priority;
2811 u32 cp_hqd_queue_priority;
2812 u32 cp_hqd_quantum;
2813 u32 cp_hqd_pq_base;
2814 u32 cp_hqd_pq_base_hi;
2815 u32 cp_hqd_pq_rptr;
2816 u32 cp_hqd_pq_rptr_report_addr;
2817 u32 cp_hqd_pq_rptr_report_addr_hi;
2818 u32 cp_hqd_pq_wptr_poll_addr;
2819 u32 cp_hqd_pq_wptr_poll_addr_hi;
2820 u32 cp_hqd_pq_doorbell_control;
2821 u32 cp_hqd_pq_wptr;
2822 u32 cp_hqd_pq_control;
2823 u32 cp_hqd_ib_base_addr;
2824 u32 cp_hqd_ib_base_addr_hi;
2825 u32 cp_hqd_ib_rptr;
2826 u32 cp_hqd_ib_control;
2827 u32 cp_hqd_iq_timer;
2828 u32 cp_hqd_iq_rptr;
2829 u32 cp_hqd_dequeue_request;
2830 u32 cp_hqd_dma_offload;
2831 u32 cp_hqd_sema_cmd;
2832 u32 cp_hqd_msg_type;
2833 u32 cp_hqd_atomic0_preop_lo;
2834 u32 cp_hqd_atomic0_preop_hi;
2835 u32 cp_hqd_atomic1_preop_lo;
2836 u32 cp_hqd_atomic1_preop_hi;
2837 u32 cp_hqd_hq_scheduler0;
2838 u32 cp_hqd_hq_scheduler1;
2839 u32 cp_mqd_control;
2840};
2841
2842static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2843 int mec, int pipe)
2844{
2845 u64 eop_gpu_addr;
2846 u32 tmp;
2847 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2848 * GFX7_MEC_HPD_SIZE * 2;
2849
2850 mutex_lock(&adev->srbm_mutex);
2851 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2852
2853 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2854
2855
2856 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2857 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2858
2859
2860 WREG32(mmCP_HPD_EOP_VMID, 0);
2861
2862
2863 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2864 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2865 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2866 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2867
2868 cik_srbm_select(adev, 0, 0, 0, 0);
2869 mutex_unlock(&adev->srbm_mutex);
2870}
2871
2872static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2873{
2874 int i;
2875
2876
2877 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2878 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2879 for (i = 0; i < adev->usec_timeout; i++) {
2880 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2881 break;
2882 udelay(1);
2883 }
2884
2885 if (i == adev->usec_timeout)
2886 return -ETIMEDOUT;
2887
2888 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2889 WREG32(mmCP_HQD_PQ_RPTR, 0);
2890 WREG32(mmCP_HQD_PQ_WPTR, 0);
2891 }
2892
2893 return 0;
2894}
2895
2896static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2897 struct cik_mqd *mqd,
2898 uint64_t mqd_gpu_addr,
2899 struct amdgpu_ring *ring)
2900{
2901 u64 hqd_gpu_addr;
2902 u64 wb_gpu_addr;
2903
2904
2905 memset(mqd, 0, sizeof(struct cik_mqd));
2906
2907 mqd->header = 0xC0310800;
2908 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2909 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2910 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2911 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2912
2913
2914 mqd->cp_hqd_pq_doorbell_control =
2915 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2916 if (ring->use_doorbell)
2917 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2918 else
2919 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2920
2921
2922 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2923 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2924
2925
2926 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2927 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2928
2929
2930 hqd_gpu_addr = ring->gpu_addr >> 8;
2931 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2932 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2933
2934
2935 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2936 mqd->cp_hqd_pq_control &=
2937 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2938 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2939
2940 mqd->cp_hqd_pq_control |=
2941 order_base_2(ring->ring_size / 8);
2942 mqd->cp_hqd_pq_control |=
2943 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2944#ifdef __BIG_ENDIAN
2945 mqd->cp_hqd_pq_control |=
2946 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2947#endif
2948 mqd->cp_hqd_pq_control &=
2949 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2950 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2951 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2952 mqd->cp_hqd_pq_control |=
2953 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2954 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2955
2956
2957 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2958 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2959 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2960
2961
2962 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2963 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2964 mqd->cp_hqd_pq_rptr_report_addr_hi =
2965 upper_32_bits(wb_gpu_addr) & 0xffff;
2966
2967
2968 if (ring->use_doorbell) {
2969 mqd->cp_hqd_pq_doorbell_control =
2970 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2971 mqd->cp_hqd_pq_doorbell_control &=
2972 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
2973 mqd->cp_hqd_pq_doorbell_control |=
2974 (ring->doorbell_index <<
2975 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
2976 mqd->cp_hqd_pq_doorbell_control |=
2977 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2978 mqd->cp_hqd_pq_doorbell_control &=
2979 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
2980 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
2981
2982 } else {
2983 mqd->cp_hqd_pq_doorbell_control = 0;
2984 }
2985
2986
2987 ring->wptr = 0;
2988 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
2989 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
2990
2991
2992 mqd->cp_hqd_vmid = 0;
2993
2994
2995 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
2996 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
2997 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
2998 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
2999 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3000 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3001 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3002 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3003 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3004 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3005 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3006 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3007 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3008 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3009 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3010 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3011
3012
3013 mqd->cp_hqd_active = 1;
3014}
3015
3016int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3017{
3018 uint32_t tmp;
3019 uint32_t mqd_reg;
3020 uint32_t *mqd_data;
3021
3022
3023 mqd_data = &mqd->cp_mqd_base_addr_lo;
3024
3025
3026 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3027 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3028 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3029
3030
3031 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3032 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3033
3034
3035 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3036 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3037
3038 return 0;
3039}
3040
3041static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3042{
3043 int r;
3044 u64 mqd_gpu_addr;
3045 struct cik_mqd *mqd;
3046 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3047
3048 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3049 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3050 &mqd_gpu_addr, (void **)&mqd);
3051 if (r) {
3052 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3053 return r;
3054 }
3055
3056 mutex_lock(&adev->srbm_mutex);
3057 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3058
3059 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3060 gfx_v7_0_mqd_deactivate(adev);
3061 gfx_v7_0_mqd_commit(adev, mqd);
3062
3063 cik_srbm_select(adev, 0, 0, 0, 0);
3064 mutex_unlock(&adev->srbm_mutex);
3065
3066 amdgpu_bo_kunmap(ring->mqd_obj);
3067 amdgpu_bo_unreserve(ring->mqd_obj);
3068 return 0;
3069}
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3081{
3082 int r, i, j;
3083 u32 tmp;
3084 struct amdgpu_ring *ring;
3085
3086
3087 tmp = RREG32(mmCP_CPF_DEBUG);
3088 tmp |= (1 << 23);
3089 WREG32(mmCP_CPF_DEBUG, tmp);
3090
3091
3092 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3093 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3094 gfx_v7_0_compute_pipe_init(adev, i, j);
3095
3096
3097 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3098 r = gfx_v7_0_compute_queue_init(adev, i);
3099 if (r) {
3100 gfx_v7_0_cp_compute_fini(adev);
3101 return r;
3102 }
3103 }
3104
3105 gfx_v7_0_cp_compute_enable(adev, true);
3106
3107 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3108 ring = &adev->gfx.compute_ring[i];
3109 ring->ready = true;
3110 r = amdgpu_ring_test_ring(ring);
3111 if (r)
3112 ring->ready = false;
3113 }
3114
3115 return 0;
3116}
3117
3118static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3119{
3120 gfx_v7_0_cp_gfx_enable(adev, enable);
3121 gfx_v7_0_cp_compute_enable(adev, enable);
3122}
3123
3124static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3125{
3126 int r;
3127
3128 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3129 if (r)
3130 return r;
3131 r = gfx_v7_0_cp_compute_load_microcode(adev);
3132 if (r)
3133 return r;
3134
3135 return 0;
3136}
3137
3138static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3139 bool enable)
3140{
3141 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3142
3143 if (enable)
3144 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3145 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3146 else
3147 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3148 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3149 WREG32(mmCP_INT_CNTL_RING0, tmp);
3150}
3151
3152static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3153{
3154 int r;
3155
3156 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3157
3158 r = gfx_v7_0_cp_load_microcode(adev);
3159 if (r)
3160 return r;
3161
3162 r = gfx_v7_0_cp_gfx_resume(adev);
3163 if (r)
3164 return r;
3165 r = gfx_v7_0_cp_compute_resume(adev);
3166 if (r)
3167 return r;
3168
3169 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3170
3171 return 0;
3172}
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3183{
3184 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3185 uint32_t seq = ring->fence_drv.sync_seq;
3186 uint64_t addr = ring->fence_drv.gpu_addr;
3187
3188 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3189 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3190 WAIT_REG_MEM_FUNCTION(3) |
3191 WAIT_REG_MEM_ENGINE(usepfp)));
3192 amdgpu_ring_write(ring, addr & 0xfffffffc);
3193 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3194 amdgpu_ring_write(ring, seq);
3195 amdgpu_ring_write(ring, 0xffffffff);
3196 amdgpu_ring_write(ring, 4);
3197
3198 if (usepfp) {
3199
3200 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3201 amdgpu_ring_write(ring, 0);
3202 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3203 amdgpu_ring_write(ring, 0);
3204 }
3205}
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3222 unsigned vmid, uint64_t pd_addr)
3223{
3224 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3225
3226 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3227
3228
3229 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3230 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3231 WAIT_REG_MEM_FUNCTION(0) |
3232 WAIT_REG_MEM_ENGINE(0)));
3233 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3234 amdgpu_ring_write(ring, 0);
3235 amdgpu_ring_write(ring, 0);
3236 amdgpu_ring_write(ring, 0);
3237 amdgpu_ring_write(ring, 0x20);
3238
3239
3240 if (usepfp) {
3241
3242 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3243 amdgpu_ring_write(ring, 0x0);
3244
3245
3246 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3247 amdgpu_ring_write(ring, 0);
3248 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3249 amdgpu_ring_write(ring, 0);
3250 }
3251}
3252
3253static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3254 uint32_t reg, uint32_t val)
3255{
3256 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3257
3258 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3259 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3260 WRITE_DATA_DST_SEL(0)));
3261 amdgpu_ring_write(ring, reg);
3262 amdgpu_ring_write(ring, 0);
3263 amdgpu_ring_write(ring, val);
3264}
3265
3266
3267
3268
3269
3270
3271static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
3272{
3273 amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
3274 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
3275 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
3276}
3277
3278static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3279{
3280 const u32 *src_ptr;
3281 volatile u32 *dst_ptr;
3282 u32 dws, i;
3283 const struct cs_section_def *cs_data;
3284 int r;
3285
3286
3287 if (adev->flags & AMD_IS_APU) {
3288 if (adev->asic_type == CHIP_KAVERI) {
3289 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3290 adev->gfx.rlc.reg_list_size =
3291 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3292 } else {
3293 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3294 adev->gfx.rlc.reg_list_size =
3295 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3296 }
3297 }
3298 adev->gfx.rlc.cs_data = ci_cs_data;
3299 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3300 adev->gfx.rlc.cp_table_size += 64 * 1024;
3301
3302 src_ptr = adev->gfx.rlc.reg_list;
3303 dws = adev->gfx.rlc.reg_list_size;
3304 dws += (5 * 16) + 48 + 48 + 64;
3305
3306 cs_data = adev->gfx.rlc.cs_data;
3307
3308 if (src_ptr) {
3309
3310 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
3311 AMDGPU_GEM_DOMAIN_VRAM,
3312 &adev->gfx.rlc.save_restore_obj,
3313 &adev->gfx.rlc.save_restore_gpu_addr,
3314 (void **)&adev->gfx.rlc.sr_ptr);
3315 if (r) {
3316 dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
3317 gfx_v7_0_rlc_fini(adev);
3318 return r;
3319 }
3320
3321
3322 dst_ptr = adev->gfx.rlc.sr_ptr;
3323 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3324 dst_ptr[i] = cpu_to_le32(src_ptr[i]);
3325 amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
3326 amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3327 }
3328
3329 if (cs_data) {
3330
3331 adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
3332
3333 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
3334 AMDGPU_GEM_DOMAIN_VRAM,
3335 &adev->gfx.rlc.clear_state_obj,
3336 &adev->gfx.rlc.clear_state_gpu_addr,
3337 (void **)&adev->gfx.rlc.cs_ptr);
3338 if (r) {
3339 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
3340 gfx_v7_0_rlc_fini(adev);
3341 return r;
3342 }
3343
3344
3345 dst_ptr = adev->gfx.rlc.cs_ptr;
3346 gfx_v7_0_get_csb_buffer(adev, dst_ptr);
3347 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
3348 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3349 }
3350
3351 if (adev->gfx.rlc.cp_table_size) {
3352
3353 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
3354 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
3355 &adev->gfx.rlc.cp_table_obj,
3356 &adev->gfx.rlc.cp_table_gpu_addr,
3357 (void **)&adev->gfx.rlc.cp_table_ptr);
3358 if (r) {
3359 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
3360 gfx_v7_0_rlc_fini(adev);
3361 return r;
3362 }
3363
3364 gfx_v7_0_init_cp_pg_table(adev);
3365
3366 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
3367 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3368
3369 }
3370
3371 return 0;
3372}
3373
3374static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3375{
3376 u32 tmp;
3377
3378 tmp = RREG32(mmRLC_LB_CNTL);
3379 if (enable)
3380 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3381 else
3382 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3383 WREG32(mmRLC_LB_CNTL, tmp);
3384}
3385
3386static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3387{
3388 u32 i, j, k;
3389 u32 mask;
3390
3391 mutex_lock(&adev->grbm_idx_mutex);
3392 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3393 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3394 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3395 for (k = 0; k < adev->usec_timeout; k++) {
3396 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3397 break;
3398 udelay(1);
3399 }
3400 }
3401 }
3402 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3403 mutex_unlock(&adev->grbm_idx_mutex);
3404
3405 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3406 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3407 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3408 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3409 for (k = 0; k < adev->usec_timeout; k++) {
3410 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3411 break;
3412 udelay(1);
3413 }
3414}
3415
3416static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3417{
3418 u32 tmp;
3419
3420 tmp = RREG32(mmRLC_CNTL);
3421 if (tmp != rlc)
3422 WREG32(mmRLC_CNTL, rlc);
3423}
3424
3425static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3426{
3427 u32 data, orig;
3428
3429 orig = data = RREG32(mmRLC_CNTL);
3430
3431 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3432 u32 i;
3433
3434 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3435 WREG32(mmRLC_CNTL, data);
3436
3437 for (i = 0; i < adev->usec_timeout; i++) {
3438 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3439 break;
3440 udelay(1);
3441 }
3442
3443 gfx_v7_0_wait_for_rlc_serdes(adev);
3444 }
3445
3446 return orig;
3447}
3448
3449static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3450{
3451 u32 tmp, i, mask;
3452
3453 tmp = 0x1 | (1 << 1);
3454 WREG32(mmRLC_GPR_REG2, tmp);
3455
3456 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3457 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3458 for (i = 0; i < adev->usec_timeout; i++) {
3459 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3460 break;
3461 udelay(1);
3462 }
3463
3464 for (i = 0; i < adev->usec_timeout; i++) {
3465 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3466 break;
3467 udelay(1);
3468 }
3469}
3470
3471static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3472{
3473 u32 tmp;
3474
3475 tmp = 0x1 | (0 << 1);
3476 WREG32(mmRLC_GPR_REG2, tmp);
3477}
3478
3479
3480
3481
3482
3483
3484
3485
3486static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3487{
3488 WREG32(mmRLC_CNTL, 0);
3489
3490 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3491
3492 gfx_v7_0_wait_for_rlc_serdes(adev);
3493}
3494
3495
3496
3497
3498
3499
3500
3501
3502static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3503{
3504 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3505
3506 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3507
3508 udelay(50);
3509}
3510
3511static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3512{
3513 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3514
3515 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3516 WREG32(mmGRBM_SOFT_RESET, tmp);
3517 udelay(50);
3518 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3519 WREG32(mmGRBM_SOFT_RESET, tmp);
3520 udelay(50);
3521}
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3533{
3534 const struct rlc_firmware_header_v1_0 *hdr;
3535 const __le32 *fw_data;
3536 unsigned i, fw_size;
3537 u32 tmp;
3538
3539 if (!adev->gfx.rlc_fw)
3540 return -EINVAL;
3541
3542 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3543 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3544 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3545 adev->gfx.rlc_feature_version = le32_to_cpu(
3546 hdr->ucode_feature_version);
3547
3548 gfx_v7_0_rlc_stop(adev);
3549
3550
3551 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3552 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3553
3554 gfx_v7_0_rlc_reset(adev);
3555
3556 gfx_v7_0_init_pg(adev);
3557
3558 WREG32(mmRLC_LB_CNTR_INIT, 0);
3559 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3560
3561 mutex_lock(&adev->grbm_idx_mutex);
3562 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3563 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3564 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3565 WREG32(mmRLC_LB_CNTL, 0x80000004);
3566 mutex_unlock(&adev->grbm_idx_mutex);
3567
3568 WREG32(mmRLC_MC_CNTL, 0);
3569 WREG32(mmRLC_UCODE_CNTL, 0);
3570
3571 fw_data = (const __le32 *)
3572 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3573 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3574 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3575 for (i = 0; i < fw_size; i++)
3576 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3577 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3578
3579
3580 gfx_v7_0_enable_lbpw(adev, false);
3581
3582 if (adev->asic_type == CHIP_BONAIRE)
3583 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3584
3585 gfx_v7_0_rlc_start(adev);
3586
3587 return 0;
3588}
3589
3590static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3591{
3592 u32 data, orig, tmp, tmp2;
3593
3594 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3595
3596 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3597 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3598
3599 tmp = gfx_v7_0_halt_rlc(adev);
3600
3601 mutex_lock(&adev->grbm_idx_mutex);
3602 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3603 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3604 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3605 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3606 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3607 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3608 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3609 mutex_unlock(&adev->grbm_idx_mutex);
3610
3611 gfx_v7_0_update_rlc(adev, tmp);
3612
3613 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3614 if (orig != data)
3615 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3616
3617 } else {
3618 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3619
3620 RREG32(mmCB_CGTT_SCLK_CTRL);
3621 RREG32(mmCB_CGTT_SCLK_CTRL);
3622 RREG32(mmCB_CGTT_SCLK_CTRL);
3623 RREG32(mmCB_CGTT_SCLK_CTRL);
3624
3625 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3626 if (orig != data)
3627 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3628
3629 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3630 }
3631}
3632
3633static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3634{
3635 u32 data, orig, tmp = 0;
3636
3637 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3638 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3639 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3640 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3641 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3642 if (orig != data)
3643 WREG32(mmCP_MEM_SLP_CNTL, data);
3644 }
3645 }
3646
3647 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3648 data |= 0x00000001;
3649 data &= 0xfffffffd;
3650 if (orig != data)
3651 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3652
3653 tmp = gfx_v7_0_halt_rlc(adev);
3654
3655 mutex_lock(&adev->grbm_idx_mutex);
3656 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3657 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3658 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3659 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3660 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3661 WREG32(mmRLC_SERDES_WR_CTRL, data);
3662 mutex_unlock(&adev->grbm_idx_mutex);
3663
3664 gfx_v7_0_update_rlc(adev, tmp);
3665
3666 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3667 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3668 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3669 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3670 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3671 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3672 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3673 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3674 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3675 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3676 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3677 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3678 if (orig != data)
3679 WREG32(mmCGTS_SM_CTRL_REG, data);
3680 }
3681 } else {
3682 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3683 data |= 0x00000003;
3684 if (orig != data)
3685 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3686
3687 data = RREG32(mmRLC_MEM_SLP_CNTL);
3688 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3689 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3690 WREG32(mmRLC_MEM_SLP_CNTL, data);
3691 }
3692
3693 data = RREG32(mmCP_MEM_SLP_CNTL);
3694 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3695 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3696 WREG32(mmCP_MEM_SLP_CNTL, data);
3697 }
3698
3699 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3700 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3701 if (orig != data)
3702 WREG32(mmCGTS_SM_CTRL_REG, data);
3703
3704 tmp = gfx_v7_0_halt_rlc(adev);
3705
3706 mutex_lock(&adev->grbm_idx_mutex);
3707 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3708 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3709 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3710 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3711 WREG32(mmRLC_SERDES_WR_CTRL, data);
3712 mutex_unlock(&adev->grbm_idx_mutex);
3713
3714 gfx_v7_0_update_rlc(adev, tmp);
3715 }
3716}
3717
3718static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3719 bool enable)
3720{
3721 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3722
3723 if (enable) {
3724 gfx_v7_0_enable_mgcg(adev, true);
3725 gfx_v7_0_enable_cgcg(adev, true);
3726 } else {
3727 gfx_v7_0_enable_cgcg(adev, false);
3728 gfx_v7_0_enable_mgcg(adev, false);
3729 }
3730 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3731}
3732
3733static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3734 bool enable)
3735{
3736 u32 data, orig;
3737
3738 orig = data = RREG32(mmRLC_PG_CNTL);
3739 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3740 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3741 else
3742 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3743 if (orig != data)
3744 WREG32(mmRLC_PG_CNTL, data);
3745}
3746
3747static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3748 bool enable)
3749{
3750 u32 data, orig;
3751
3752 orig = data = RREG32(mmRLC_PG_CNTL);
3753 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3754 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3755 else
3756 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3757 if (orig != data)
3758 WREG32(mmRLC_PG_CNTL, data);
3759}
3760
3761static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3762{
3763 u32 data, orig;
3764
3765 orig = data = RREG32(mmRLC_PG_CNTL);
3766 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3767 data &= ~0x8000;
3768 else
3769 data |= 0x8000;
3770 if (orig != data)
3771 WREG32(mmRLC_PG_CNTL, data);
3772}
3773
3774static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3775{
3776 u32 data, orig;
3777
3778 orig = data = RREG32(mmRLC_PG_CNTL);
3779 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3780 data &= ~0x2000;
3781 else
3782 data |= 0x2000;
3783 if (orig != data)
3784 WREG32(mmRLC_PG_CNTL, data);
3785}
3786
3787static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
3788{
3789 const __le32 *fw_data;
3790 volatile u32 *dst_ptr;
3791 int me, i, max_me = 4;
3792 u32 bo_offset = 0;
3793 u32 table_offset, table_size;
3794
3795 if (adev->asic_type == CHIP_KAVERI)
3796 max_me = 5;
3797
3798 if (adev->gfx.rlc.cp_table_ptr == NULL)
3799 return;
3800
3801
3802 dst_ptr = adev->gfx.rlc.cp_table_ptr;
3803 for (me = 0; me < max_me; me++) {
3804 if (me == 0) {
3805 const struct gfx_firmware_header_v1_0 *hdr =
3806 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
3807 fw_data = (const __le32 *)
3808 (adev->gfx.ce_fw->data +
3809 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3810 table_offset = le32_to_cpu(hdr->jt_offset);
3811 table_size = le32_to_cpu(hdr->jt_size);
3812 } else if (me == 1) {
3813 const struct gfx_firmware_header_v1_0 *hdr =
3814 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
3815 fw_data = (const __le32 *)
3816 (adev->gfx.pfp_fw->data +
3817 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3818 table_offset = le32_to_cpu(hdr->jt_offset);
3819 table_size = le32_to_cpu(hdr->jt_size);
3820 } else if (me == 2) {
3821 const struct gfx_firmware_header_v1_0 *hdr =
3822 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
3823 fw_data = (const __le32 *)
3824 (adev->gfx.me_fw->data +
3825 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3826 table_offset = le32_to_cpu(hdr->jt_offset);
3827 table_size = le32_to_cpu(hdr->jt_size);
3828 } else if (me == 3) {
3829 const struct gfx_firmware_header_v1_0 *hdr =
3830 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3831 fw_data = (const __le32 *)
3832 (adev->gfx.mec_fw->data +
3833 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3834 table_offset = le32_to_cpu(hdr->jt_offset);
3835 table_size = le32_to_cpu(hdr->jt_size);
3836 } else {
3837 const struct gfx_firmware_header_v1_0 *hdr =
3838 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3839 fw_data = (const __le32 *)
3840 (adev->gfx.mec2_fw->data +
3841 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3842 table_offset = le32_to_cpu(hdr->jt_offset);
3843 table_size = le32_to_cpu(hdr->jt_size);
3844 }
3845
3846 for (i = 0; i < table_size; i ++) {
3847 dst_ptr[bo_offset + i] =
3848 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
3849 }
3850
3851 bo_offset += table_size;
3852 }
3853}
3854
3855static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3856 bool enable)
3857{
3858 u32 data, orig;
3859
3860 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3861 orig = data = RREG32(mmRLC_PG_CNTL);
3862 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3863 if (orig != data)
3864 WREG32(mmRLC_PG_CNTL, data);
3865
3866 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3867 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3868 if (orig != data)
3869 WREG32(mmRLC_AUTO_PG_CTRL, data);
3870 } else {
3871 orig = data = RREG32(mmRLC_PG_CNTL);
3872 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3873 if (orig != data)
3874 WREG32(mmRLC_PG_CNTL, data);
3875
3876 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3877 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3878 if (orig != data)
3879 WREG32(mmRLC_AUTO_PG_CTRL, data);
3880
3881 data = RREG32(mmDB_RENDER_CONTROL);
3882 }
3883}
3884
3885static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3886 u32 bitmap)
3887{
3888 u32 data;
3889
3890 if (!bitmap)
3891 return;
3892
3893 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3894 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3895
3896 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3897}
3898
3899static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3900{
3901 u32 data, mask;
3902
3903 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3904 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3905
3906 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3907 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3908
3909 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3910
3911 return (~data) & mask;
3912}
3913
3914static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3915{
3916 u32 tmp;
3917
3918 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3919
3920 tmp = RREG32(mmRLC_MAX_PG_CU);
3921 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3922 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3923 WREG32(mmRLC_MAX_PG_CU, tmp);
3924}
3925
3926static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3927 bool enable)
3928{
3929 u32 data, orig;
3930
3931 orig = data = RREG32(mmRLC_PG_CNTL);
3932 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3933 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3934 else
3935 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3936 if (orig != data)
3937 WREG32(mmRLC_PG_CNTL, data);
3938}
3939
3940static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3941 bool enable)
3942{
3943 u32 data, orig;
3944
3945 orig = data = RREG32(mmRLC_PG_CNTL);
3946 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3947 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3948 else
3949 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3950 if (orig != data)
3951 WREG32(mmRLC_PG_CNTL, data);
3952}
3953
3954#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3955#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3956
3957static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3958{
3959 u32 data, orig;
3960 u32 i;
3961
3962 if (adev->gfx.rlc.cs_data) {
3963 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3964 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3965 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3966 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3967 } else {
3968 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3969 for (i = 0; i < 3; i++)
3970 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3971 }
3972 if (adev->gfx.rlc.reg_list) {
3973 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3974 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3975 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3976 }
3977
3978 orig = data = RREG32(mmRLC_PG_CNTL);
3979 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3980 if (orig != data)
3981 WREG32(mmRLC_PG_CNTL, data);
3982
3983 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3984 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3985
3986 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3987 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3988 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3989 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3990
3991 data = 0x10101010;
3992 WREG32(mmRLC_PG_DELAY, data);
3993
3994 data = RREG32(mmRLC_PG_DELAY_2);
3995 data &= ~0xff;
3996 data |= 0x3;
3997 WREG32(mmRLC_PG_DELAY_2, data);
3998
3999 data = RREG32(mmRLC_AUTO_PG_CTRL);
4000 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
4001 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
4002 WREG32(mmRLC_AUTO_PG_CTRL, data);
4003
4004}
4005
4006static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
4007{
4008 gfx_v7_0_enable_gfx_cgpg(adev, enable);
4009 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
4010 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
4011}
4012
4013static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
4014{
4015 u32 count = 0;
4016 const struct cs_section_def *sect = NULL;
4017 const struct cs_extent_def *ext = NULL;
4018
4019 if (adev->gfx.rlc.cs_data == NULL)
4020 return 0;
4021
4022
4023 count += 2;
4024
4025 count += 3;
4026
4027 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4028 for (ext = sect->section; ext->extent != NULL; ++ext) {
4029 if (sect->id == SECT_CONTEXT)
4030 count += 2 + ext->reg_count;
4031 else
4032 return 0;
4033 }
4034 }
4035
4036 count += 4;
4037
4038 count += 2;
4039
4040 count += 2;
4041
4042 return count;
4043}
4044
4045static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
4046 volatile u32 *buffer)
4047{
4048 u32 count = 0, i;
4049 const struct cs_section_def *sect = NULL;
4050 const struct cs_extent_def *ext = NULL;
4051
4052 if (adev->gfx.rlc.cs_data == NULL)
4053 return;
4054 if (buffer == NULL)
4055 return;
4056
4057 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4058 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4059
4060 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4061 buffer[count++] = cpu_to_le32(0x80000000);
4062 buffer[count++] = cpu_to_le32(0x80000000);
4063
4064 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4065 for (ext = sect->section; ext->extent != NULL; ++ext) {
4066 if (sect->id == SECT_CONTEXT) {
4067 buffer[count++] =
4068 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4069 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4070 for (i = 0; i < ext->reg_count; i++)
4071 buffer[count++] = cpu_to_le32(ext->extent[i]);
4072 } else {
4073 return;
4074 }
4075 }
4076 }
4077
4078 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4079 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4080 switch (adev->asic_type) {
4081 case CHIP_BONAIRE:
4082 buffer[count++] = cpu_to_le32(0x16000012);
4083 buffer[count++] = cpu_to_le32(0x00000000);
4084 break;
4085 case CHIP_KAVERI:
4086 buffer[count++] = cpu_to_le32(0x00000000);
4087 buffer[count++] = cpu_to_le32(0x00000000);
4088 break;
4089 case CHIP_KABINI:
4090 case CHIP_MULLINS:
4091 buffer[count++] = cpu_to_le32(0x00000000);
4092 buffer[count++] = cpu_to_le32(0x00000000);
4093 break;
4094 case CHIP_HAWAII:
4095 buffer[count++] = cpu_to_le32(0x3a00161a);
4096 buffer[count++] = cpu_to_le32(0x0000002e);
4097 break;
4098 default:
4099 buffer[count++] = cpu_to_le32(0x00000000);
4100 buffer[count++] = cpu_to_le32(0x00000000);
4101 break;
4102 }
4103
4104 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4105 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4106
4107 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4108 buffer[count++] = cpu_to_le32(0);
4109}
4110
4111static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4112{
4113 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4114 AMD_PG_SUPPORT_GFX_SMG |
4115 AMD_PG_SUPPORT_GFX_DMG |
4116 AMD_PG_SUPPORT_CP |
4117 AMD_PG_SUPPORT_GDS |
4118 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4119 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4120 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4121 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4122 gfx_v7_0_init_gfx_cgpg(adev);
4123 gfx_v7_0_enable_cp_pg(adev, true);
4124 gfx_v7_0_enable_gds_pg(adev, true);
4125 }
4126 gfx_v7_0_init_ao_cu_mask(adev);
4127 gfx_v7_0_update_gfx_pg(adev, true);
4128 }
4129}
4130
4131static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4132{
4133 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4134 AMD_PG_SUPPORT_GFX_SMG |
4135 AMD_PG_SUPPORT_GFX_DMG |
4136 AMD_PG_SUPPORT_CP |
4137 AMD_PG_SUPPORT_GDS |
4138 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4139 gfx_v7_0_update_gfx_pg(adev, false);
4140 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4141 gfx_v7_0_enable_cp_pg(adev, false);
4142 gfx_v7_0_enable_gds_pg(adev, false);
4143 }
4144 }
4145}
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4156{
4157 uint64_t clock;
4158
4159 mutex_lock(&adev->gfx.gpu_clock_mutex);
4160 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4161 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4162 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4163 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4164 return clock;
4165}
4166
4167static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4168 uint32_t vmid,
4169 uint32_t gds_base, uint32_t gds_size,
4170 uint32_t gws_base, uint32_t gws_size,
4171 uint32_t oa_base, uint32_t oa_size)
4172{
4173
4174 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4175 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4176 WRITE_DATA_DST_SEL(0)));
4177 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4178 amdgpu_ring_write(ring, 0);
4179 amdgpu_ring_write(ring, gds_base);
4180
4181
4182 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4183 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4184 WRITE_DATA_DST_SEL(0)));
4185 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4186 amdgpu_ring_write(ring, 0);
4187 amdgpu_ring_write(ring, gds_size);
4188
4189
4190 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4191 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4192 WRITE_DATA_DST_SEL(0)));
4193 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4194 amdgpu_ring_write(ring, 0);
4195 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4196
4197
4198 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4199 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4200 WRITE_DATA_DST_SEL(0)));
4201 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4202 amdgpu_ring_write(ring, 0);
4203 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4204}
4205
4206static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4207{
4208 struct amdgpu_device *adev = ring->adev;
4209 uint32_t value = 0;
4210
4211 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4212 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4213 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4214 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4215 WREG32(mmSQ_CMD, value);
4216}
4217
4218static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4219{
4220 WREG32(mmSQ_IND_INDEX,
4221 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4222 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4223 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4224 (SQ_IND_INDEX__FORCE_READ_MASK));
4225 return RREG32(mmSQ_IND_DATA);
4226}
4227
4228static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4229 uint32_t wave, uint32_t thread,
4230 uint32_t regno, uint32_t num, uint32_t *out)
4231{
4232 WREG32(mmSQ_IND_INDEX,
4233 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4234 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4235 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4236 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4237 (SQ_IND_INDEX__FORCE_READ_MASK) |
4238 (SQ_IND_INDEX__AUTO_INCR_MASK));
4239 while (num--)
4240 *(out++) = RREG32(mmSQ_IND_DATA);
4241}
4242
4243static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4244{
4245
4246 dst[(*no_fields)++] = 0;
4247 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4248 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4249 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4250 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4251 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4252 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4253 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4254 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4255 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4256 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4257 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4258 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4259 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4260 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4261 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4262 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4263 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4264 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4265}
4266
4267static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4268 uint32_t wave, uint32_t start,
4269 uint32_t size, uint32_t *dst)
4270{
4271 wave_read_regs(
4272 adev, simd, wave, 0,
4273 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4274}
4275
4276static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4277 u32 me, u32 pipe, u32 q)
4278{
4279 cik_srbm_select(adev, me, pipe, q, 0);
4280}
4281
4282static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4283 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4284 .select_se_sh = &gfx_v7_0_select_se_sh,
4285 .read_wave_data = &gfx_v7_0_read_wave_data,
4286 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4287 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4288};
4289
4290static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4291 .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
4292 .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
4293};
4294
4295static int gfx_v7_0_early_init(void *handle)
4296{
4297 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4298
4299 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4300 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4301 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4302 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4303 gfx_v7_0_set_ring_funcs(adev);
4304 gfx_v7_0_set_irq_funcs(adev);
4305 gfx_v7_0_set_gds_init(adev);
4306
4307 return 0;
4308}
4309
4310static int gfx_v7_0_late_init(void *handle)
4311{
4312 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4313 int r;
4314
4315 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4316 if (r)
4317 return r;
4318
4319 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4320 if (r)
4321 return r;
4322
4323 return 0;
4324}
4325
4326static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4327{
4328 u32 gb_addr_config;
4329 u32 mc_shared_chmap, mc_arb_ramcfg;
4330 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4331 u32 tmp;
4332
4333 switch (adev->asic_type) {
4334 case CHIP_BONAIRE:
4335 adev->gfx.config.max_shader_engines = 2;
4336 adev->gfx.config.max_tile_pipes = 4;
4337 adev->gfx.config.max_cu_per_sh = 7;
4338 adev->gfx.config.max_sh_per_se = 1;
4339 adev->gfx.config.max_backends_per_se = 2;
4340 adev->gfx.config.max_texture_channel_caches = 4;
4341 adev->gfx.config.max_gprs = 256;
4342 adev->gfx.config.max_gs_threads = 32;
4343 adev->gfx.config.max_hw_contexts = 8;
4344
4345 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4346 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4347 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4348 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4349 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4350 break;
4351 case CHIP_HAWAII:
4352 adev->gfx.config.max_shader_engines = 4;
4353 adev->gfx.config.max_tile_pipes = 16;
4354 adev->gfx.config.max_cu_per_sh = 11;
4355 adev->gfx.config.max_sh_per_se = 1;
4356 adev->gfx.config.max_backends_per_se = 4;
4357 adev->gfx.config.max_texture_channel_caches = 16;
4358 adev->gfx.config.max_gprs = 256;
4359 adev->gfx.config.max_gs_threads = 32;
4360 adev->gfx.config.max_hw_contexts = 8;
4361
4362 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4363 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4364 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4365 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4366 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4367 break;
4368 case CHIP_KAVERI:
4369 adev->gfx.config.max_shader_engines = 1;
4370 adev->gfx.config.max_tile_pipes = 4;
4371 adev->gfx.config.max_cu_per_sh = 8;
4372 adev->gfx.config.max_backends_per_se = 2;
4373 adev->gfx.config.max_sh_per_se = 1;
4374 adev->gfx.config.max_texture_channel_caches = 4;
4375 adev->gfx.config.max_gprs = 256;
4376 adev->gfx.config.max_gs_threads = 16;
4377 adev->gfx.config.max_hw_contexts = 8;
4378
4379 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4380 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4381 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4382 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4383 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4384 break;
4385 case CHIP_KABINI:
4386 case CHIP_MULLINS:
4387 default:
4388 adev->gfx.config.max_shader_engines = 1;
4389 adev->gfx.config.max_tile_pipes = 2;
4390 adev->gfx.config.max_cu_per_sh = 2;
4391 adev->gfx.config.max_sh_per_se = 1;
4392 adev->gfx.config.max_backends_per_se = 1;
4393 adev->gfx.config.max_texture_channel_caches = 2;
4394 adev->gfx.config.max_gprs = 256;
4395 adev->gfx.config.max_gs_threads = 16;
4396 adev->gfx.config.max_hw_contexts = 8;
4397
4398 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4399 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4400 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4401 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4402 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4403 break;
4404 }
4405
4406 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
4407 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4408 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4409
4410 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4411 adev->gfx.config.mem_max_burst_length_bytes = 256;
4412 if (adev->flags & AMD_IS_APU) {
4413
4414 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4415 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4416 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4417
4418 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4419 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4420 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4421
4422
4423 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4424 dimm00_addr_map = 0;
4425 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4426 dimm01_addr_map = 0;
4427 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4428 dimm10_addr_map = 0;
4429 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4430 dimm11_addr_map = 0;
4431
4432
4433
4434 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4435 adev->gfx.config.mem_row_size_in_kb = 2;
4436 else
4437 adev->gfx.config.mem_row_size_in_kb = 1;
4438 } else {
4439 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4440 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4441 if (adev->gfx.config.mem_row_size_in_kb > 4)
4442 adev->gfx.config.mem_row_size_in_kb = 4;
4443 }
4444
4445 adev->gfx.config.shader_engine_tile_size = 32;
4446 adev->gfx.config.num_gpus = 1;
4447 adev->gfx.config.multi_gpu_tile_size = 64;
4448
4449
4450 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4451 switch (adev->gfx.config.mem_row_size_in_kb) {
4452 case 1:
4453 default:
4454 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4455 break;
4456 case 2:
4457 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4458 break;
4459 case 4:
4460 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4461 break;
4462 }
4463 adev->gfx.config.gb_addr_config = gb_addr_config;
4464}
4465
4466static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4467 int mec, int pipe, int queue)
4468{
4469 int r;
4470 unsigned irq_type;
4471 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4472
4473
4474 ring->me = mec + 1;
4475 ring->pipe = pipe;
4476 ring->queue = queue;
4477
4478 ring->ring_obj = NULL;
4479 ring->use_doorbell = true;
4480 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
4481 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4482
4483 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4484 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4485 + ring->pipe;
4486
4487
4488 r = amdgpu_ring_init(adev, ring, 1024,
4489 &adev->gfx.eop_irq, irq_type);
4490 if (r)
4491 return r;
4492
4493
4494 return 0;
4495}
4496
4497static int gfx_v7_0_sw_init(void *handle)
4498{
4499 struct amdgpu_ring *ring;
4500 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4501 int i, j, k, r, ring_id;
4502
4503 switch (adev->asic_type) {
4504 case CHIP_KAVERI:
4505 adev->gfx.mec.num_mec = 2;
4506 break;
4507 case CHIP_BONAIRE:
4508 case CHIP_HAWAII:
4509 case CHIP_KABINI:
4510 case CHIP_MULLINS:
4511 default:
4512 adev->gfx.mec.num_mec = 1;
4513 break;
4514 }
4515 adev->gfx.mec.num_pipe_per_mec = 4;
4516 adev->gfx.mec.num_queue_per_pipe = 8;
4517
4518
4519 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4520 if (r)
4521 return r;
4522
4523
4524 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4525 &adev->gfx.priv_reg_irq);
4526 if (r)
4527 return r;
4528
4529
4530 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4531 &adev->gfx.priv_inst_irq);
4532 if (r)
4533 return r;
4534
4535 gfx_v7_0_scratch_init(adev);
4536
4537 r = gfx_v7_0_init_microcode(adev);
4538 if (r) {
4539 DRM_ERROR("Failed to load gfx firmware!\n");
4540 return r;
4541 }
4542
4543 r = gfx_v7_0_rlc_init(adev);
4544 if (r) {
4545 DRM_ERROR("Failed to init rlc BOs!\n");
4546 return r;
4547 }
4548
4549
4550 r = gfx_v7_0_mec_init(adev);
4551 if (r) {
4552 DRM_ERROR("Failed to init MEC BOs!\n");
4553 return r;
4554 }
4555
4556 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4557 ring = &adev->gfx.gfx_ring[i];
4558 ring->ring_obj = NULL;
4559 sprintf(ring->name, "gfx");
4560 r = amdgpu_ring_init(adev, ring, 1024,
4561 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
4562 if (r)
4563 return r;
4564 }
4565
4566
4567 ring_id = 0;
4568 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4569 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4570 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4571 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4572 continue;
4573
4574 r = gfx_v7_0_compute_ring_init(adev,
4575 ring_id,
4576 i, k, j);
4577 if (r)
4578 return r;
4579
4580 ring_id++;
4581 }
4582 }
4583 }
4584
4585 adev->gfx.ce_ram_size = 0x8000;
4586
4587 gfx_v7_0_gpu_early_init(adev);
4588
4589 return r;
4590}
4591
4592static int gfx_v7_0_sw_fini(void *handle)
4593{
4594 int i;
4595 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4596
4597 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
4598 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
4599 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
4600
4601 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4602 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4603 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4604 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4605
4606 gfx_v7_0_cp_compute_fini(adev);
4607 gfx_v7_0_rlc_fini(adev);
4608 gfx_v7_0_mec_fini(adev);
4609 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4610 &adev->gfx.rlc.clear_state_gpu_addr,
4611 (void **)&adev->gfx.rlc.cs_ptr);
4612 if (adev->gfx.rlc.cp_table_size) {
4613 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4614 &adev->gfx.rlc.cp_table_gpu_addr,
4615 (void **)&adev->gfx.rlc.cp_table_ptr);
4616 }
4617 gfx_v7_0_free_microcode(adev);
4618
4619 return 0;
4620}
4621
4622static int gfx_v7_0_hw_init(void *handle)
4623{
4624 int r;
4625 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4626
4627 gfx_v7_0_constants_init(adev);
4628
4629
4630 r = gfx_v7_0_rlc_resume(adev);
4631 if (r)
4632 return r;
4633
4634 r = gfx_v7_0_cp_resume(adev);
4635 if (r)
4636 return r;
4637
4638 return r;
4639}
4640
4641static int gfx_v7_0_hw_fini(void *handle)
4642{
4643 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4644
4645 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4646 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4647 gfx_v7_0_cp_enable(adev, false);
4648 gfx_v7_0_rlc_stop(adev);
4649 gfx_v7_0_fini_pg(adev);
4650
4651 return 0;
4652}
4653
4654static int gfx_v7_0_suspend(void *handle)
4655{
4656 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4657
4658 return gfx_v7_0_hw_fini(adev);
4659}
4660
4661static int gfx_v7_0_resume(void *handle)
4662{
4663 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4664
4665 return gfx_v7_0_hw_init(adev);
4666}
4667
4668static bool gfx_v7_0_is_idle(void *handle)
4669{
4670 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4671
4672 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4673 return false;
4674 else
4675 return true;
4676}
4677
4678static int gfx_v7_0_wait_for_idle(void *handle)
4679{
4680 unsigned i;
4681 u32 tmp;
4682 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4683
4684 for (i = 0; i < adev->usec_timeout; i++) {
4685
4686 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4687
4688 if (!tmp)
4689 return 0;
4690 udelay(1);
4691 }
4692 return -ETIMEDOUT;
4693}
4694
4695static int gfx_v7_0_soft_reset(void *handle)
4696{
4697 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4698 u32 tmp;
4699 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4700
4701
4702 tmp = RREG32(mmGRBM_STATUS);
4703 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4704 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4705 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4706 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4707 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4708 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4709 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4710 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4711
4712 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4713 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4714 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4715 }
4716
4717
4718 tmp = RREG32(mmGRBM_STATUS2);
4719 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4720 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4721
4722
4723 tmp = RREG32(mmSRBM_STATUS);
4724 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4725 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4726
4727 if (grbm_soft_reset || srbm_soft_reset) {
4728
4729 gfx_v7_0_fini_pg(adev);
4730 gfx_v7_0_update_cg(adev, false);
4731
4732
4733 gfx_v7_0_rlc_stop(adev);
4734
4735
4736 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4737
4738
4739 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4740
4741 if (grbm_soft_reset) {
4742 tmp = RREG32(mmGRBM_SOFT_RESET);
4743 tmp |= grbm_soft_reset;
4744 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4745 WREG32(mmGRBM_SOFT_RESET, tmp);
4746 tmp = RREG32(mmGRBM_SOFT_RESET);
4747
4748 udelay(50);
4749
4750 tmp &= ~grbm_soft_reset;
4751 WREG32(mmGRBM_SOFT_RESET, tmp);
4752 tmp = RREG32(mmGRBM_SOFT_RESET);
4753 }
4754
4755 if (srbm_soft_reset) {
4756 tmp = RREG32(mmSRBM_SOFT_RESET);
4757 tmp |= srbm_soft_reset;
4758 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4759 WREG32(mmSRBM_SOFT_RESET, tmp);
4760 tmp = RREG32(mmSRBM_SOFT_RESET);
4761
4762 udelay(50);
4763
4764 tmp &= ~srbm_soft_reset;
4765 WREG32(mmSRBM_SOFT_RESET, tmp);
4766 tmp = RREG32(mmSRBM_SOFT_RESET);
4767 }
4768
4769 udelay(50);
4770 }
4771 return 0;
4772}
4773
4774static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4775 enum amdgpu_interrupt_state state)
4776{
4777 u32 cp_int_cntl;
4778
4779 switch (state) {
4780 case AMDGPU_IRQ_STATE_DISABLE:
4781 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4782 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4783 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4784 break;
4785 case AMDGPU_IRQ_STATE_ENABLE:
4786 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4787 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4788 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4789 break;
4790 default:
4791 break;
4792 }
4793}
4794
4795static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4796 int me, int pipe,
4797 enum amdgpu_interrupt_state state)
4798{
4799 u32 mec_int_cntl, mec_int_cntl_reg;
4800
4801
4802
4803
4804
4805
4806
4807 if (me == 1) {
4808 switch (pipe) {
4809 case 0:
4810 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4811 break;
4812 case 1:
4813 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4814 break;
4815 case 2:
4816 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4817 break;
4818 case 3:
4819 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4820 break;
4821 default:
4822 DRM_DEBUG("invalid pipe %d\n", pipe);
4823 return;
4824 }
4825 } else {
4826 DRM_DEBUG("invalid me %d\n", me);
4827 return;
4828 }
4829
4830 switch (state) {
4831 case AMDGPU_IRQ_STATE_DISABLE:
4832 mec_int_cntl = RREG32(mec_int_cntl_reg);
4833 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4834 WREG32(mec_int_cntl_reg, mec_int_cntl);
4835 break;
4836 case AMDGPU_IRQ_STATE_ENABLE:
4837 mec_int_cntl = RREG32(mec_int_cntl_reg);
4838 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4839 WREG32(mec_int_cntl_reg, mec_int_cntl);
4840 break;
4841 default:
4842 break;
4843 }
4844}
4845
4846static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4847 struct amdgpu_irq_src *src,
4848 unsigned type,
4849 enum amdgpu_interrupt_state state)
4850{
4851 u32 cp_int_cntl;
4852
4853 switch (state) {
4854 case AMDGPU_IRQ_STATE_DISABLE:
4855 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4856 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4857 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4858 break;
4859 case AMDGPU_IRQ_STATE_ENABLE:
4860 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4861 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4862 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4863 break;
4864 default:
4865 break;
4866 }
4867
4868 return 0;
4869}
4870
4871static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4872 struct amdgpu_irq_src *src,
4873 unsigned type,
4874 enum amdgpu_interrupt_state state)
4875{
4876 u32 cp_int_cntl;
4877
4878 switch (state) {
4879 case AMDGPU_IRQ_STATE_DISABLE:
4880 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4881 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4882 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4883 break;
4884 case AMDGPU_IRQ_STATE_ENABLE:
4885 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4886 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4887 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4888 break;
4889 default:
4890 break;
4891 }
4892
4893 return 0;
4894}
4895
4896static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4897 struct amdgpu_irq_src *src,
4898 unsigned type,
4899 enum amdgpu_interrupt_state state)
4900{
4901 switch (type) {
4902 case AMDGPU_CP_IRQ_GFX_EOP:
4903 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4904 break;
4905 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4906 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4907 break;
4908 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4909 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4910 break;
4911 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4912 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4913 break;
4914 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4915 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4916 break;
4917 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4918 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4919 break;
4920 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4921 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4922 break;
4923 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4924 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4925 break;
4926 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4927 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4928 break;
4929 default:
4930 break;
4931 }
4932 return 0;
4933}
4934
4935static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4936 struct amdgpu_irq_src *source,
4937 struct amdgpu_iv_entry *entry)
4938{
4939 u8 me_id, pipe_id;
4940 struct amdgpu_ring *ring;
4941 int i;
4942
4943 DRM_DEBUG("IH: CP EOP\n");
4944 me_id = (entry->ring_id & 0x0c) >> 2;
4945 pipe_id = (entry->ring_id & 0x03) >> 0;
4946 switch (me_id) {
4947 case 0:
4948 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4949 break;
4950 case 1:
4951 case 2:
4952 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4953 ring = &adev->gfx.compute_ring[i];
4954 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4955 amdgpu_fence_process(ring);
4956 }
4957 break;
4958 }
4959 return 0;
4960}
4961
4962static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4963 struct amdgpu_irq_src *source,
4964 struct amdgpu_iv_entry *entry)
4965{
4966 DRM_ERROR("Illegal register access in command stream\n");
4967 schedule_work(&adev->reset_work);
4968 return 0;
4969}
4970
4971static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4972 struct amdgpu_irq_src *source,
4973 struct amdgpu_iv_entry *entry)
4974{
4975 DRM_ERROR("Illegal instruction in command stream\n");
4976
4977 schedule_work(&adev->reset_work);
4978 return 0;
4979}
4980
4981static int gfx_v7_0_set_clockgating_state(void *handle,
4982 enum amd_clockgating_state state)
4983{
4984 bool gate = false;
4985 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4986
4987 if (state == AMD_CG_STATE_GATE)
4988 gate = true;
4989
4990 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4991
4992 if (gate) {
4993 gfx_v7_0_enable_mgcg(adev, true);
4994 gfx_v7_0_enable_cgcg(adev, true);
4995 } else {
4996 gfx_v7_0_enable_cgcg(adev, false);
4997 gfx_v7_0_enable_mgcg(adev, false);
4998 }
4999 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
5000
5001 return 0;
5002}
5003
5004static int gfx_v7_0_set_powergating_state(void *handle,
5005 enum amd_powergating_state state)
5006{
5007 bool gate = false;
5008 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5009
5010 if (state == AMD_PG_STATE_GATE)
5011 gate = true;
5012
5013 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
5014 AMD_PG_SUPPORT_GFX_SMG |
5015 AMD_PG_SUPPORT_GFX_DMG |
5016 AMD_PG_SUPPORT_CP |
5017 AMD_PG_SUPPORT_GDS |
5018 AMD_PG_SUPPORT_RLC_SMU_HS)) {
5019 gfx_v7_0_update_gfx_pg(adev, gate);
5020 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
5021 gfx_v7_0_enable_cp_pg(adev, gate);
5022 gfx_v7_0_enable_gds_pg(adev, gate);
5023 }
5024 }
5025
5026 return 0;
5027}
5028
5029static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5030 .name = "gfx_v7_0",
5031 .early_init = gfx_v7_0_early_init,
5032 .late_init = gfx_v7_0_late_init,
5033 .sw_init = gfx_v7_0_sw_init,
5034 .sw_fini = gfx_v7_0_sw_fini,
5035 .hw_init = gfx_v7_0_hw_init,
5036 .hw_fini = gfx_v7_0_hw_fini,
5037 .suspend = gfx_v7_0_suspend,
5038 .resume = gfx_v7_0_resume,
5039 .is_idle = gfx_v7_0_is_idle,
5040 .wait_for_idle = gfx_v7_0_wait_for_idle,
5041 .soft_reset = gfx_v7_0_soft_reset,
5042 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5043 .set_powergating_state = gfx_v7_0_set_powergating_state,
5044};
5045
5046static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5047 .type = AMDGPU_RING_TYPE_GFX,
5048 .align_mask = 0xff,
5049 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5050 .support_64bit_ptrs = false,
5051 .get_rptr = gfx_v7_0_ring_get_rptr,
5052 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5053 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5054 .emit_frame_size =
5055 20 +
5056 7 +
5057 5 +
5058 12 + 12 + 12 +
5059 7 + 4 +
5060 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
5061 3 + 4,
5062 .emit_ib_size = 4,
5063 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5064 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5065 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5066 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5067 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5068 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5069 .test_ring = gfx_v7_0_ring_test_ring,
5070 .test_ib = gfx_v7_0_ring_test_ib,
5071 .insert_nop = amdgpu_ring_insert_nop,
5072 .pad_ib = amdgpu_ring_generic_pad_ib,
5073 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5074 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5075 .soft_recovery = gfx_v7_0_ring_soft_recovery,
5076};
5077
5078static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5079 .type = AMDGPU_RING_TYPE_COMPUTE,
5080 .align_mask = 0xff,
5081 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5082 .support_64bit_ptrs = false,
5083 .get_rptr = gfx_v7_0_ring_get_rptr,
5084 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5085 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5086 .emit_frame_size =
5087 20 +
5088 7 +
5089 5 +
5090 7 +
5091 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5092 7 + 7 + 7,
5093 .emit_ib_size = 4,
5094 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5095 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5096 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5097 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5098 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5099 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5100 .test_ring = gfx_v7_0_ring_test_ring,
5101 .test_ib = gfx_v7_0_ring_test_ib,
5102 .insert_nop = amdgpu_ring_insert_nop,
5103 .pad_ib = amdgpu_ring_generic_pad_ib,
5104 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5105};
5106
5107static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5108{
5109 int i;
5110
5111 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5112 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5113 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5114 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5115}
5116
5117static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5118 .set = gfx_v7_0_set_eop_interrupt_state,
5119 .process = gfx_v7_0_eop_irq,
5120};
5121
5122static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5123 .set = gfx_v7_0_set_priv_reg_fault_state,
5124 .process = gfx_v7_0_priv_reg_irq,
5125};
5126
5127static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5128 .set = gfx_v7_0_set_priv_inst_fault_state,
5129 .process = gfx_v7_0_priv_inst_irq,
5130};
5131
5132static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5133{
5134 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5135 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5136
5137 adev->gfx.priv_reg_irq.num_types = 1;
5138 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5139
5140 adev->gfx.priv_inst_irq.num_types = 1;
5141 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5142}
5143
5144static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5145{
5146
5147 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5148 adev->gds.gws.total_size = 64;
5149 adev->gds.oa.total_size = 16;
5150
5151 if (adev->gds.mem.total_size == 64 * 1024) {
5152 adev->gds.mem.gfx_partition_size = 4096;
5153 adev->gds.mem.cs_partition_size = 4096;
5154
5155 adev->gds.gws.gfx_partition_size = 4;
5156 adev->gds.gws.cs_partition_size = 4;
5157
5158 adev->gds.oa.gfx_partition_size = 4;
5159 adev->gds.oa.cs_partition_size = 1;
5160 } else {
5161 adev->gds.mem.gfx_partition_size = 1024;
5162 adev->gds.mem.cs_partition_size = 1024;
5163
5164 adev->gds.gws.gfx_partition_size = 16;
5165 adev->gds.gws.cs_partition_size = 16;
5166
5167 adev->gds.oa.gfx_partition_size = 4;
5168 adev->gds.oa.cs_partition_size = 4;
5169 }
5170}
5171
5172
5173static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5174{
5175 int i, j, k, counter, active_cu_number = 0;
5176 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5177 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5178 unsigned disable_masks[4 * 2];
5179 u32 ao_cu_num;
5180
5181 if (adev->flags & AMD_IS_APU)
5182 ao_cu_num = 2;
5183 else
5184 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5185
5186 memset(cu_info, 0, sizeof(*cu_info));
5187
5188 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5189
5190 mutex_lock(&adev->grbm_idx_mutex);
5191 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5192 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5193 mask = 1;
5194 ao_bitmap = 0;
5195 counter = 0;
5196 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5197 if (i < 4 && j < 2)
5198 gfx_v7_0_set_user_cu_inactive_bitmap(
5199 adev, disable_masks[i * 2 + j]);
5200 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5201 cu_info->bitmap[i][j] = bitmap;
5202
5203 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5204 if (bitmap & mask) {
5205 if (counter < ao_cu_num)
5206 ao_bitmap |= mask;
5207 counter ++;
5208 }
5209 mask <<= 1;
5210 }
5211 active_cu_number += counter;
5212 if (i < 2 && j < 2)
5213 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5214 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5215 }
5216 }
5217 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5218 mutex_unlock(&adev->grbm_idx_mutex);
5219
5220 cu_info->number = active_cu_number;
5221 cu_info->ao_cu_mask = ao_cu_mask;
5222 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5223 cu_info->max_waves_per_simd = 10;
5224 cu_info->max_scratch_slots_per_cu = 32;
5225 cu_info->wave_front_size = 64;
5226 cu_info->lds_size = 64;
5227}
5228
5229const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5230{
5231 .type = AMD_IP_BLOCK_TYPE_GFX,
5232 .major = 7,
5233 .minor = 0,
5234 .rev = 0,
5235 .funcs = &gfx_v7_0_ip_funcs,
5236};
5237
5238const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5239{
5240 .type = AMD_IP_BLOCK_TYPE_GFX,
5241 .major = 7,
5242 .minor = 1,
5243 .rev = 0,
5244 .funcs = &gfx_v7_0_ip_funcs,
5245};
5246
5247const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5248{
5249 .type = AMD_IP_BLOCK_TYPE_GFX,
5250 .major = 7,
5251 .minor = 2,
5252 .rev = 0,
5253 .funcs = &gfx_v7_0_ip_funcs,
5254};
5255
5256const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5257{
5258 .type = AMD_IP_BLOCK_TYPE_GFX,
5259 .major = 7,
5260 .minor = 3,
5261 .rev = 0,
5262 .funcs = &gfx_v7_0_ip_funcs,
5263};
5264