1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/firmware.h>
24#include <drm/drmP.h>
25#include "amdgpu.h"
26#include "amdgpu_ih.h"
27#include "amdgpu_gfx.h"
28#include "cikd.h"
29#include "cik.h"
30#include "cik_structs.h"
31#include "atom.h"
32#include "amdgpu_ucode.h"
33#include "clearstate_ci.h"
34
35#include "dce/dce_8_0_d.h"
36#include "dce/dce_8_0_sh_mask.h"
37
38#include "bif/bif_4_1_d.h"
39#include "bif/bif_4_1_sh_mask.h"
40
41#include "gca/gfx_7_0_d.h"
42#include "gca/gfx_7_2_enum.h"
43#include "gca/gfx_7_2_sh_mask.h"
44
45#include "gmc/gmc_7_0_d.h"
46#include "gmc/gmc_7_0_sh_mask.h"
47
48#include "oss/oss_2_0_d.h"
49#include "oss/oss_2_0_sh_mask.h"
50
51#define GFX7_NUM_GFX_RINGS 1
52#define GFX7_MEC_HPD_SIZE 2048
53
54static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
55static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
56static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
57
58MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
59MODULE_FIRMWARE("radeon/bonaire_me.bin");
60MODULE_FIRMWARE("radeon/bonaire_ce.bin");
61MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63
64MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
65MODULE_FIRMWARE("radeon/hawaii_me.bin");
66MODULE_FIRMWARE("radeon/hawaii_ce.bin");
67MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
68MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69
70MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
71MODULE_FIRMWARE("radeon/kaveri_me.bin");
72MODULE_FIRMWARE("radeon/kaveri_ce.bin");
73MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
74MODULE_FIRMWARE("radeon/kaveri_mec.bin");
75MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
76
77MODULE_FIRMWARE("radeon/kabini_pfp.bin");
78MODULE_FIRMWARE("radeon/kabini_me.bin");
79MODULE_FIRMWARE("radeon/kabini_ce.bin");
80MODULE_FIRMWARE("radeon/kabini_rlc.bin");
81MODULE_FIRMWARE("radeon/kabini_mec.bin");
82
83MODULE_FIRMWARE("radeon/mullins_pfp.bin");
84MODULE_FIRMWARE("radeon/mullins_me.bin");
85MODULE_FIRMWARE("radeon/mullins_ce.bin");
86MODULE_FIRMWARE("radeon/mullins_rlc.bin");
87MODULE_FIRMWARE("radeon/mullins_mec.bin");
88
89static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
90{
91 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
92 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
93 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
94 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
95 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
96 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
97 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
98 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
99 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
100 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
101 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
102 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
103 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
104 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
105 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
106 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
107};
108
109static const u32 spectre_rlc_save_restore_register_list[] =
110{
111 (0x0e00 << 16) | (0xc12c >> 2),
112 0x00000000,
113 (0x0e00 << 16) | (0xc140 >> 2),
114 0x00000000,
115 (0x0e00 << 16) | (0xc150 >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc15c >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc168 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc170 >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc178 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc204 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc2b4 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc2b8 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2bc >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2c0 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0x8228 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0x829c >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x869c >> 2),
140 0x00000000,
141 (0x0600 << 16) | (0x98f4 >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0x98f8 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0x9900 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0xc260 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0x90e8 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0x3c000 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x3c00c >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x8c1c >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x9700 >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0xcd20 >> 2),
160 0x00000000,
161 (0x4e00 << 16) | (0xcd20 >> 2),
162 0x00000000,
163 (0x5e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x6e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x7e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x8e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x9e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0xae00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0xbe00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0x0e00 << 16) | (0x89bc >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x8900 >> 2),
180 0x00000000,
181 0x3,
182 (0x0e00 << 16) | (0xc130 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0xc134 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc1fc >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc208 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc264 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc268 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc26c >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc270 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc274 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc278 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc27c >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc280 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc284 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc288 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc28c >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc290 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc294 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc298 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc29c >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc2a0 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc2a4 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a8 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2ac >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2b0 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0x301d0 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0x30238 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x30250 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30254 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30258 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x3025c >> 2),
241 0x00000000,
242 (0x4e00 << 16) | (0xc900 >> 2),
243 0x00000000,
244 (0x5e00 << 16) | (0xc900 >> 2),
245 0x00000000,
246 (0x6e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x7e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x8e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x9e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0xae00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0xbe00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0x4e00 << 16) | (0xc904 >> 2),
259 0x00000000,
260 (0x5e00 << 16) | (0xc904 >> 2),
261 0x00000000,
262 (0x6e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x7e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x8e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x9e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0xae00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0xbe00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0x4e00 << 16) | (0xc908 >> 2),
275 0x00000000,
276 (0x5e00 << 16) | (0xc908 >> 2),
277 0x00000000,
278 (0x6e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x7e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x8e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x9e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0xae00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0xbe00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0x4e00 << 16) | (0xc90c >> 2),
291 0x00000000,
292 (0x5e00 << 16) | (0xc90c >> 2),
293 0x00000000,
294 (0x6e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x7e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x8e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x9e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0xae00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0xbe00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0x4e00 << 16) | (0xc910 >> 2),
307 0x00000000,
308 (0x5e00 << 16) | (0xc910 >> 2),
309 0x00000000,
310 (0x6e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x7e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x8e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x9e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0xae00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0xbe00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0x0e00 << 16) | (0xc99c >> 2),
323 0x00000000,
324 (0x0e00 << 16) | (0x9834 >> 2),
325 0x00000000,
326 (0x0000 << 16) | (0x30f00 >> 2),
327 0x00000000,
328 (0x0001 << 16) | (0x30f00 >> 2),
329 0x00000000,
330 (0x0000 << 16) | (0x30f04 >> 2),
331 0x00000000,
332 (0x0001 << 16) | (0x30f04 >> 2),
333 0x00000000,
334 (0x0000 << 16) | (0x30f08 >> 2),
335 0x00000000,
336 (0x0001 << 16) | (0x30f08 >> 2),
337 0x00000000,
338 (0x0000 << 16) | (0x30f0c >> 2),
339 0x00000000,
340 (0x0001 << 16) | (0x30f0c >> 2),
341 0x00000000,
342 (0x0600 << 16) | (0x9b7c >> 2),
343 0x00000000,
344 (0x0e00 << 16) | (0x8a14 >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x8a18 >> 2),
347 0x00000000,
348 (0x0600 << 16) | (0x30a00 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0x8bf0 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0x8bcc >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8b24 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x30a04 >> 2),
357 0x00000000,
358 (0x0600 << 16) | (0x30a10 >> 2),
359 0x00000000,
360 (0x0600 << 16) | (0x30a14 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a18 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a2c >> 2),
365 0x00000000,
366 (0x0e00 << 16) | (0xc700 >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0xc704 >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc708 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc768 >> 2),
373 0x00000000,
374 (0x0400 << 16) | (0xc770 >> 2),
375 0x00000000,
376 (0x0400 << 16) | (0xc774 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc778 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc77c >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc780 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc784 >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc788 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc78c >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc798 >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc79c >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc7a0 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc7a4 >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a8 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7ac >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7b0 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7b4 >> 2),
405 0x00000000,
406 (0x0e00 << 16) | (0x9100 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0x3c010 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x92a8 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x92ac >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92b4 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92b8 >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92bc >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92c0 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92c4 >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c8 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92cc >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92d0 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x8c00 >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x8c04 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c20 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c38 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c3c >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0xae00 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x9604 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xac08 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0xac0c >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac10 >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac14 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac58 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac68 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac6c >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac70 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac74 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac78 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac7c >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac80 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac84 >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac88 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac8c >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0x970c >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0x9714 >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x9718 >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x971c >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x31068 >> 2),
483 0x00000000,
484 (0x4e00 << 16) | (0x31068 >> 2),
485 0x00000000,
486 (0x5e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x6e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x7e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x8e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x9e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0xae00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0xbe00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0xcd10 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0xcd14 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x88b0 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x88b4 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b8 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88bc >> 2),
511 0x00000000,
512 (0x0400 << 16) | (0x89c0 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x88c4 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x88c8 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88d0 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88d4 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d8 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x8980 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x30938 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x3093c >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x30940 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x89a0 >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x30900 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x30904 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x89b4 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x3c210 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x3c214 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c218 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x8904 >> 2),
547 0x00000000,
548 0x5,
549 (0x0e00 << 16) | (0x8c28 >> 2),
550 (0x0e00 << 16) | (0x8c2c >> 2),
551 (0x0e00 << 16) | (0x8c30 >> 2),
552 (0x0e00 << 16) | (0x8c34 >> 2),
553 (0x0e00 << 16) | (0x9600 >> 2),
554};
555
556static const u32 kalindi_rlc_save_restore_register_list[] =
557{
558 (0x0e00 << 16) | (0xc12c >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0xc140 >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0xc150 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc15c >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc168 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc170 >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc204 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc2b4 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc2b8 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2bc >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2c0 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0x8228 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x829c >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x869c >> 2),
585 0x00000000,
586 (0x0600 << 16) | (0x98f4 >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x98f8 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0x9900 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0xc260 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x90e8 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0x3c000 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x3c00c >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x8c1c >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x9700 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0xcd20 >> 2),
605 0x00000000,
606 (0x4e00 << 16) | (0xcd20 >> 2),
607 0x00000000,
608 (0x5e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x6e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x7e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0x89bc >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x8900 >> 2),
617 0x00000000,
618 0x3,
619 (0x0e00 << 16) | (0xc130 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xc134 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc1fc >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc208 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc264 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc268 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc26c >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc270 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc274 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc28c >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc290 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc294 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc298 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc2a0 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc2a4 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a8 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2ac >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0x301d0 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x30238 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x30250 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30254 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30258 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x3025c >> 2),
664 0x00000000,
665 (0x4e00 << 16) | (0xc900 >> 2),
666 0x00000000,
667 (0x5e00 << 16) | (0xc900 >> 2),
668 0x00000000,
669 (0x6e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x7e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x4e00 << 16) | (0xc904 >> 2),
674 0x00000000,
675 (0x5e00 << 16) | (0xc904 >> 2),
676 0x00000000,
677 (0x6e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x7e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x4e00 << 16) | (0xc908 >> 2),
682 0x00000000,
683 (0x5e00 << 16) | (0xc908 >> 2),
684 0x00000000,
685 (0x6e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x7e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x4e00 << 16) | (0xc90c >> 2),
690 0x00000000,
691 (0x5e00 << 16) | (0xc90c >> 2),
692 0x00000000,
693 (0x6e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x7e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x4e00 << 16) | (0xc910 >> 2),
698 0x00000000,
699 (0x5e00 << 16) | (0xc910 >> 2),
700 0x00000000,
701 (0x6e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x7e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x0e00 << 16) | (0xc99c >> 2),
706 0x00000000,
707 (0x0e00 << 16) | (0x9834 >> 2),
708 0x00000000,
709 (0x0000 << 16) | (0x30f00 >> 2),
710 0x00000000,
711 (0x0000 << 16) | (0x30f04 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f08 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f0c >> 2),
716 0x00000000,
717 (0x0600 << 16) | (0x9b7c >> 2),
718 0x00000000,
719 (0x0e00 << 16) | (0x8a14 >> 2),
720 0x00000000,
721 (0x0e00 << 16) | (0x8a18 >> 2),
722 0x00000000,
723 (0x0600 << 16) | (0x30a00 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0x8bf0 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0x8bcc >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8b24 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x30a04 >> 2),
732 0x00000000,
733 (0x0600 << 16) | (0x30a10 >> 2),
734 0x00000000,
735 (0x0600 << 16) | (0x30a14 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a18 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a2c >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0xc700 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0xc704 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc708 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc768 >> 2),
748 0x00000000,
749 (0x0400 << 16) | (0xc770 >> 2),
750 0x00000000,
751 (0x0400 << 16) | (0xc774 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc798 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc79c >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x9100 >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0x3c010 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x8c00 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x8c04 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c20 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c38 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c3c >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0xae00 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0x9604 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xac08 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xac0c >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac10 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac14 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac58 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac68 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac6c >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac70 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac74 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac78 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac7c >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac80 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac84 >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac88 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac8c >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0x970c >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0x9714 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x9718 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x971c >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x31068 >> 2),
814 0x00000000,
815 (0x4e00 << 16) | (0x31068 >> 2),
816 0x00000000,
817 (0x5e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x6e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x7e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0xcd10 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xcd14 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x88b0 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x88b4 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b8 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88bc >> 2),
834 0x00000000,
835 (0x0400 << 16) | (0x89c0 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x88c4 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x88c8 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88d0 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88d4 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d8 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x8980 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x30938 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x3093c >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x30940 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x89a0 >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x30900 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x30904 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x89b4 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x3e1fc >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x3c210 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3c214 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c218 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x8904 >> 2),
872 0x00000000,
873 0x5,
874 (0x0e00 << 16) | (0x8c28 >> 2),
875 (0x0e00 << 16) | (0x8c2c >> 2),
876 (0x0e00 << 16) | (0x8c30 >> 2),
877 (0x0e00 << 16) | (0x8c34 >> 2),
878 (0x0e00 << 16) | (0x9600 >> 2),
879};
880
881static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
882static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
883static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
884static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
885static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
886
887
888
889
890
891
892
893
894
895
896
897
898
899static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
900{
901 const char *chip_name;
902 char fw_name[30];
903 int err;
904
905 DRM_DEBUG("\n");
906
907 switch (adev->asic_type) {
908 case CHIP_BONAIRE:
909 chip_name = "bonaire";
910 break;
911 case CHIP_HAWAII:
912 chip_name = "hawaii";
913 break;
914 case CHIP_KAVERI:
915 chip_name = "kaveri";
916 break;
917 case CHIP_KABINI:
918 chip_name = "kabini";
919 break;
920 case CHIP_MULLINS:
921 chip_name = "mullins";
922 break;
923 default: BUG();
924 }
925
926 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
927 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
928 if (err)
929 goto out;
930 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
931 if (err)
932 goto out;
933
934 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
941
942 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
943 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
944 if (err)
945 goto out;
946 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
947 if (err)
948 goto out;
949
950 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
951 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
952 if (err)
953 goto out;
954 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
955 if (err)
956 goto out;
957
958 if (adev->asic_type == CHIP_KAVERI) {
959 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
960 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
961 if (err)
962 goto out;
963 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
964 if (err)
965 goto out;
966 }
967
968 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
969 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 if (err)
971 goto out;
972 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
973
974out:
975 if (err) {
976 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
977 release_firmware(adev->gfx.pfp_fw);
978 adev->gfx.pfp_fw = NULL;
979 release_firmware(adev->gfx.me_fw);
980 adev->gfx.me_fw = NULL;
981 release_firmware(adev->gfx.ce_fw);
982 adev->gfx.ce_fw = NULL;
983 release_firmware(adev->gfx.mec_fw);
984 adev->gfx.mec_fw = NULL;
985 release_firmware(adev->gfx.mec2_fw);
986 adev->gfx.mec2_fw = NULL;
987 release_firmware(adev->gfx.rlc_fw);
988 adev->gfx.rlc_fw = NULL;
989 }
990 return err;
991}
992
993static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
994{
995 release_firmware(adev->gfx.pfp_fw);
996 adev->gfx.pfp_fw = NULL;
997 release_firmware(adev->gfx.me_fw);
998 adev->gfx.me_fw = NULL;
999 release_firmware(adev->gfx.ce_fw);
1000 adev->gfx.ce_fw = NULL;
1001 release_firmware(adev->gfx.mec_fw);
1002 adev->gfx.mec_fw = NULL;
1003 release_firmware(adev->gfx.mec2_fw);
1004 adev->gfx.mec2_fw = NULL;
1005 release_firmware(adev->gfx.rlc_fw);
1006 adev->gfx.rlc_fw = NULL;
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1021{
1022 const u32 num_tile_mode_states =
1023 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1024 const u32 num_secondary_tile_mode_states =
1025 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1026 u32 reg_offset, split_equal_to_row_size;
1027 uint32_t *tile, *macrotile;
1028
1029 tile = adev->gfx.config.tile_mode_array;
1030 macrotile = adev->gfx.config.macrotile_mode_array;
1031
1032 switch (adev->gfx.config.mem_row_size_in_kb) {
1033 case 1:
1034 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1035 break;
1036 case 2:
1037 default:
1038 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1039 break;
1040 case 4:
1041 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1042 break;
1043 }
1044
1045 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1046 tile[reg_offset] = 0;
1047 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1048 macrotile[reg_offset] = 0;
1049
1050 switch (adev->asic_type) {
1051 case CHIP_BONAIRE:
1052 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1053 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1056 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1057 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1060 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1061 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1064 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1065 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1066 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1067 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1068 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1071 TILE_SPLIT(split_equal_to_row_size));
1072 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1073 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1074 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1075 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1076 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1078 TILE_SPLIT(split_equal_to_row_size));
1079 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1080 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1081 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1082 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1083 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1085 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1089 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1090 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1091 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1093 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1094 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1095 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1097 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1098 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1101 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1102 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1103 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1105 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1106 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1109 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1110 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1111 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1114 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1115 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1117 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1121 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1122 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1123 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1125 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1126 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1127 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1129 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1130 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1131 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1132 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1134 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1135 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1136 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1138 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1139 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1140 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1142 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1143 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1144 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1145 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1149 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1150 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1151 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1153 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1154
1155 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1158 NUM_BANKS(ADDR_SURF_16_BANK));
1159 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1162 NUM_BANKS(ADDR_SURF_16_BANK));
1163 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1166 NUM_BANKS(ADDR_SURF_16_BANK));
1167 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1170 NUM_BANKS(ADDR_SURF_16_BANK));
1171 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1174 NUM_BANKS(ADDR_SURF_16_BANK));
1175 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1178 NUM_BANKS(ADDR_SURF_8_BANK));
1179 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1182 NUM_BANKS(ADDR_SURF_4_BANK));
1183 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1186 NUM_BANKS(ADDR_SURF_16_BANK));
1187 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1190 NUM_BANKS(ADDR_SURF_16_BANK));
1191 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1194 NUM_BANKS(ADDR_SURF_16_BANK));
1195 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1198 NUM_BANKS(ADDR_SURF_16_BANK));
1199 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1202 NUM_BANKS(ADDR_SURF_16_BANK));
1203 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1206 NUM_BANKS(ADDR_SURF_8_BANK));
1207 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1210 NUM_BANKS(ADDR_SURF_4_BANK));
1211
1212 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1213 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1214 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1215 if (reg_offset != 7)
1216 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1217 break;
1218 case CHIP_HAWAII:
1219 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1220 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1223 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1224 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1227 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1231 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1232 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1235 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1236 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1238 TILE_SPLIT(split_equal_to_row_size));
1239 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1240 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1241 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1242 TILE_SPLIT(split_equal_to_row_size));
1243 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1244 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1246 TILE_SPLIT(split_equal_to_row_size));
1247 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1248 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1250 TILE_SPLIT(split_equal_to_row_size));
1251 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1252 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1253 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1255 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1256 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1260 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1261 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1262 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1264 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1265 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1266 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1268 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1269 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1271 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1272 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1275 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1276 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1279 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1280 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1283 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1284 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1285 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1287 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1289 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1291 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1294 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1298 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1302 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1306 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1307 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1310 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1314 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1318 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1322 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1325 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1329 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1330 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1331 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1333 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1334 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1335 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1337
1338 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1341 NUM_BANKS(ADDR_SURF_16_BANK));
1342 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1345 NUM_BANKS(ADDR_SURF_16_BANK));
1346 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1349 NUM_BANKS(ADDR_SURF_16_BANK));
1350 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1353 NUM_BANKS(ADDR_SURF_16_BANK));
1354 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1357 NUM_BANKS(ADDR_SURF_8_BANK));
1358 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1361 NUM_BANKS(ADDR_SURF_4_BANK));
1362 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1365 NUM_BANKS(ADDR_SURF_4_BANK));
1366 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1369 NUM_BANKS(ADDR_SURF_16_BANK));
1370 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1373 NUM_BANKS(ADDR_SURF_16_BANK));
1374 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1377 NUM_BANKS(ADDR_SURF_16_BANK));
1378 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1381 NUM_BANKS(ADDR_SURF_8_BANK));
1382 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1385 NUM_BANKS(ADDR_SURF_16_BANK));
1386 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1389 NUM_BANKS(ADDR_SURF_8_BANK));
1390 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1393 NUM_BANKS(ADDR_SURF_4_BANK));
1394
1395 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1396 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1397 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1398 if (reg_offset != 7)
1399 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1400 break;
1401 case CHIP_KABINI:
1402 case CHIP_KAVERI:
1403 case CHIP_MULLINS:
1404 default:
1405 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1406 PIPE_CONFIG(ADDR_SURF_P2) |
1407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1409 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1410 PIPE_CONFIG(ADDR_SURF_P2) |
1411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1413 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1414 PIPE_CONFIG(ADDR_SURF_P2) |
1415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1417 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1418 PIPE_CONFIG(ADDR_SURF_P2) |
1419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1421 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1422 PIPE_CONFIG(ADDR_SURF_P2) |
1423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1424 TILE_SPLIT(split_equal_to_row_size));
1425 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1426 PIPE_CONFIG(ADDR_SURF_P2) |
1427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1428 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1429 PIPE_CONFIG(ADDR_SURF_P2) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1431 TILE_SPLIT(split_equal_to_row_size));
1432 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1433 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1434 PIPE_CONFIG(ADDR_SURF_P2));
1435 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1436 PIPE_CONFIG(ADDR_SURF_P2) |
1437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1438 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1439 PIPE_CONFIG(ADDR_SURF_P2) |
1440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1442 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1443 PIPE_CONFIG(ADDR_SURF_P2) |
1444 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1446 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1447 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1448 PIPE_CONFIG(ADDR_SURF_P2) |
1449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1450 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1451 PIPE_CONFIG(ADDR_SURF_P2) |
1452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1454 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1455 PIPE_CONFIG(ADDR_SURF_P2) |
1456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1458 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1459 PIPE_CONFIG(ADDR_SURF_P2) |
1460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1462 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1463 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1464 PIPE_CONFIG(ADDR_SURF_P2) |
1465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1467 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1468 PIPE_CONFIG(ADDR_SURF_P2) |
1469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1470 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1471 PIPE_CONFIG(ADDR_SURF_P2) |
1472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1474 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1475 PIPE_CONFIG(ADDR_SURF_P2) |
1476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1478 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1479 PIPE_CONFIG(ADDR_SURF_P2) |
1480 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1482 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1483 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1484 PIPE_CONFIG(ADDR_SURF_P2) |
1485 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1487 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1488 PIPE_CONFIG(ADDR_SURF_P2) |
1489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1491 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1492 PIPE_CONFIG(ADDR_SURF_P2) |
1493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1495 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1496 PIPE_CONFIG(ADDR_SURF_P2) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1498 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1502 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1503 PIPE_CONFIG(ADDR_SURF_P2) |
1504 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1506 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1507
1508 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1511 NUM_BANKS(ADDR_SURF_8_BANK));
1512 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1515 NUM_BANKS(ADDR_SURF_8_BANK));
1516 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1519 NUM_BANKS(ADDR_SURF_8_BANK));
1520 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1523 NUM_BANKS(ADDR_SURF_8_BANK));
1524 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1527 NUM_BANKS(ADDR_SURF_8_BANK));
1528 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1531 NUM_BANKS(ADDR_SURF_8_BANK));
1532 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1535 NUM_BANKS(ADDR_SURF_8_BANK));
1536 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1539 NUM_BANKS(ADDR_SURF_16_BANK));
1540 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1543 NUM_BANKS(ADDR_SURF_16_BANK));
1544 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1547 NUM_BANKS(ADDR_SURF_16_BANK));
1548 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1551 NUM_BANKS(ADDR_SURF_16_BANK));
1552 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1555 NUM_BANKS(ADDR_SURF_16_BANK));
1556 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1559 NUM_BANKS(ADDR_SURF_16_BANK));
1560 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1563 NUM_BANKS(ADDR_SURF_8_BANK));
1564
1565 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1566 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1567 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1568 if (reg_offset != 7)
1569 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1570 break;
1571 }
1572}
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1586 u32 se_num, u32 sh_num, u32 instance)
1587{
1588 u32 data;
1589
1590 if (instance == 0xffffffff)
1591 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1592 else
1593 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1594
1595 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1596 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1597 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1598 else if (se_num == 0xffffffff)
1599 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1600 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1601 else if (sh_num == 0xffffffff)
1602 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1603 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1604 else
1605 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1606 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1607 WREG32(mmGRBM_GFX_INDEX, data);
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1619{
1620 u32 data, mask;
1621
1622 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1623 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1624
1625 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1626 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1627
1628 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1629 adev->gfx.config.max_sh_per_se);
1630
1631 return (~data) & mask;
1632}
1633
1634static void
1635gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1636{
1637 switch (adev->asic_type) {
1638 case CHIP_BONAIRE:
1639 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1640 SE_XSEL(1) | SE_YSEL(1);
1641 *rconf1 |= 0x0;
1642 break;
1643 case CHIP_HAWAII:
1644 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1645 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1646 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1647 SE_YSEL(3);
1648 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1649 SE_PAIR_YSEL(2);
1650 break;
1651 case CHIP_KAVERI:
1652 *rconf |= RB_MAP_PKR0(2);
1653 *rconf1 |= 0x0;
1654 break;
1655 case CHIP_KABINI:
1656 case CHIP_MULLINS:
1657 *rconf |= 0x0;
1658 *rconf1 |= 0x0;
1659 break;
1660 default:
1661 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1662 break;
1663 }
1664}
1665
1666static void
1667gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1668 u32 raster_config, u32 raster_config_1,
1669 unsigned rb_mask, unsigned num_rb)
1670{
1671 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1672 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1673 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1674 unsigned rb_per_se = num_rb / num_se;
1675 unsigned se_mask[4];
1676 unsigned se;
1677
1678 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1679 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1680 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1681 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1682
1683 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1684 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1685 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1686
1687 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1688 (!se_mask[2] && !se_mask[3]))) {
1689 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1690
1691 if (!se_mask[0] && !se_mask[1]) {
1692 raster_config_1 |=
1693 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1694 } else {
1695 raster_config_1 |=
1696 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1697 }
1698 }
1699
1700 for (se = 0; se < num_se; se++) {
1701 unsigned raster_config_se = raster_config;
1702 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1703 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1704 int idx = (se / 2) * 2;
1705
1706 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1707 raster_config_se &= ~SE_MAP_MASK;
1708
1709 if (!se_mask[idx]) {
1710 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1711 } else {
1712 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1713 }
1714 }
1715
1716 pkr0_mask &= rb_mask;
1717 pkr1_mask &= rb_mask;
1718 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1719 raster_config_se &= ~PKR_MAP_MASK;
1720
1721 if (!pkr0_mask) {
1722 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1723 } else {
1724 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1725 }
1726 }
1727
1728 if (rb_per_se >= 2) {
1729 unsigned rb0_mask = 1 << (se * rb_per_se);
1730 unsigned rb1_mask = rb0_mask << 1;
1731
1732 rb0_mask &= rb_mask;
1733 rb1_mask &= rb_mask;
1734 if (!rb0_mask || !rb1_mask) {
1735 raster_config_se &= ~RB_MAP_PKR0_MASK;
1736
1737 if (!rb0_mask) {
1738 raster_config_se |=
1739 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1740 } else {
1741 raster_config_se |=
1742 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1743 }
1744 }
1745
1746 if (rb_per_se > 2) {
1747 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1748 rb1_mask = rb0_mask << 1;
1749 rb0_mask &= rb_mask;
1750 rb1_mask &= rb_mask;
1751 if (!rb0_mask || !rb1_mask) {
1752 raster_config_se &= ~RB_MAP_PKR1_MASK;
1753
1754 if (!rb0_mask) {
1755 raster_config_se |=
1756 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1757 } else {
1758 raster_config_se |=
1759 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1760 }
1761 }
1762 }
1763 }
1764
1765
1766 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1767 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1768 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1769 }
1770
1771
1772 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1773}
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1785{
1786 int i, j;
1787 u32 data;
1788 u32 raster_config = 0, raster_config_1 = 0;
1789 u32 active_rbs = 0;
1790 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1791 adev->gfx.config.max_sh_per_se;
1792 unsigned num_rb_pipes;
1793
1794 mutex_lock(&adev->grbm_idx_mutex);
1795 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1796 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1797 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1798 data = gfx_v7_0_get_rb_active_bitmap(adev);
1799 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1800 rb_bitmap_width_per_sh);
1801 }
1802 }
1803 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1804
1805 adev->gfx.config.backend_enable_mask = active_rbs;
1806 adev->gfx.config.num_rbs = hweight32(active_rbs);
1807
1808 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1809 adev->gfx.config.max_shader_engines, 16);
1810
1811 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1812
1813 if (!adev->gfx.config.backend_enable_mask ||
1814 adev->gfx.config.num_rbs >= num_rb_pipes) {
1815 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1816 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1817 } else {
1818 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1819 adev->gfx.config.backend_enable_mask,
1820 num_rb_pipes);
1821 }
1822 mutex_unlock(&adev->grbm_idx_mutex);
1823}
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833#define DEFAULT_SH_MEM_BASES (0x6000)
1834#define FIRST_COMPUTE_VMID (8)
1835#define LAST_COMPUTE_VMID (16)
1836static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1837{
1838 int i;
1839 uint32_t sh_mem_config;
1840 uint32_t sh_mem_bases;
1841
1842
1843
1844
1845
1846
1847
1848 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1849 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1850 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1851 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1852 mutex_lock(&adev->srbm_mutex);
1853 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1854 cik_srbm_select(adev, 0, 0, 0, i);
1855
1856 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1857 WREG32(mmSH_MEM_APE1_BASE, 1);
1858 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1859 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1860 }
1861 cik_srbm_select(adev, 0, 0, 0, 0);
1862 mutex_unlock(&adev->srbm_mutex);
1863}
1864
1865static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1866{
1867 adev->gfx.config.double_offchip_lds_buf = 1;
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
1879{
1880 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1881 u32 tmp;
1882 int i;
1883
1884 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1885
1886 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1887 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1888 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1889
1890 gfx_v7_0_tiling_mode_table_init(adev);
1891
1892 gfx_v7_0_setup_rb(adev);
1893 gfx_v7_0_get_cu_info(adev);
1894 gfx_v7_0_config_init(adev);
1895
1896
1897 WREG32(mmCP_MEQ_THRESHOLDS,
1898 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1899 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1900
1901 mutex_lock(&adev->grbm_idx_mutex);
1902
1903
1904
1905
1906 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1907
1908
1909
1910 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1911 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1912 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1913 MTYPE_NC);
1914 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1915 MTYPE_UC);
1916 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1917
1918 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1919 SWIZZLE_ENABLE, 1);
1920 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1921 ELEMENT_SIZE, 1);
1922 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1923 INDEX_STRIDE, 3);
1924 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1925
1926 mutex_lock(&adev->srbm_mutex);
1927 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1928 if (i == 0)
1929 sh_mem_base = 0;
1930 else
1931 sh_mem_base = adev->mc.shared_aperture_start >> 48;
1932 cik_srbm_select(adev, 0, 0, 0, i);
1933
1934 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1935 WREG32(mmSH_MEM_APE1_BASE, 1);
1936 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1937 WREG32(mmSH_MEM_BASES, sh_mem_base);
1938 }
1939 cik_srbm_select(adev, 0, 0, 0, 0);
1940 mutex_unlock(&adev->srbm_mutex);
1941
1942 gfx_v7_0_init_compute_vmid(adev);
1943
1944 WREG32(mmSX_DEBUG_1, 0x20);
1945
1946 WREG32(mmTA_CNTL_AUX, 0x00010000);
1947
1948 tmp = RREG32(mmSPI_CONFIG_CNTL);
1949 tmp |= 0x03000000;
1950 WREG32(mmSPI_CONFIG_CNTL, tmp);
1951
1952 WREG32(mmSQ_CONFIG, 1);
1953
1954 WREG32(mmDB_DEBUG, 0);
1955
1956 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
1957 tmp |= 0x00000400;
1958 WREG32(mmDB_DEBUG2, tmp);
1959
1960 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
1961 tmp |= 0x00020200;
1962 WREG32(mmDB_DEBUG3, tmp);
1963
1964 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
1965 tmp |= 0x00018208;
1966 WREG32(mmCB_HW_CONTROL, tmp);
1967
1968 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
1969
1970 WREG32(mmPA_SC_FIFO_SIZE,
1971 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1972 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1973 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1974 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
1975
1976 WREG32(mmVGT_NUM_INSTANCES, 1);
1977
1978 WREG32(mmCP_PERFMON_CNTL, 0);
1979
1980 WREG32(mmSQ_CONFIG, 0);
1981
1982 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
1983 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
1984 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
1985
1986 WREG32(mmVGT_CACHE_INVALIDATION,
1987 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
1988 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
1989
1990 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
1991 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
1992
1993 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
1994 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
1995 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
1996
1997 tmp = RREG32(mmSPI_ARB_PRIORITY);
1998 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
1999 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2000 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2001 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2002 WREG32(mmSPI_ARB_PRIORITY, tmp);
2003
2004 mutex_unlock(&adev->grbm_idx_mutex);
2005
2006 udelay(50);
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2023{
2024 adev->gfx.scratch.num_reg = 8;
2025 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2026 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2027}
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2041{
2042 struct amdgpu_device *adev = ring->adev;
2043 uint32_t scratch;
2044 uint32_t tmp = 0;
2045 unsigned i;
2046 int r;
2047
2048 r = amdgpu_gfx_scratch_get(adev, &scratch);
2049 if (r) {
2050 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
2051 return r;
2052 }
2053 WREG32(scratch, 0xCAFEDEAD);
2054 r = amdgpu_ring_alloc(ring, 3);
2055 if (r) {
2056 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
2057 amdgpu_gfx_scratch_free(adev, scratch);
2058 return r;
2059 }
2060 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2061 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2062 amdgpu_ring_write(ring, 0xDEADBEEF);
2063 amdgpu_ring_commit(ring);
2064
2065 for (i = 0; i < adev->usec_timeout; i++) {
2066 tmp = RREG32(scratch);
2067 if (tmp == 0xDEADBEEF)
2068 break;
2069 DRM_UDELAY(1);
2070 }
2071 if (i < adev->usec_timeout) {
2072 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2073 } else {
2074 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2075 ring->idx, scratch, tmp);
2076 r = -EINVAL;
2077 }
2078 amdgpu_gfx_scratch_free(adev, scratch);
2079 return r;
2080}
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2091{
2092 u32 ref_and_mask;
2093 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2094
2095 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2096 switch (ring->me) {
2097 case 1:
2098 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2099 break;
2100 case 2:
2101 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2102 break;
2103 default:
2104 return;
2105 }
2106 } else {
2107 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2108 }
2109
2110 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2111 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2112 WAIT_REG_MEM_FUNCTION(3) |
2113 WAIT_REG_MEM_ENGINE(usepfp)));
2114 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2115 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2116 amdgpu_ring_write(ring, ref_and_mask);
2117 amdgpu_ring_write(ring, ref_and_mask);
2118 amdgpu_ring_write(ring, 0x20);
2119}
2120
2121static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2122{
2123 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2124 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2125 EVENT_INDEX(4));
2126
2127 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2128 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2129 EVENT_INDEX(0));
2130}
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
2142{
2143 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2144 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2145 WRITE_DATA_DST_SEL(0) |
2146 WR_CONFIRM));
2147 amdgpu_ring_write(ring, mmHDP_DEBUG0);
2148 amdgpu_ring_write(ring, 0);
2149 amdgpu_ring_write(ring, 1);
2150}
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2162 u64 seq, unsigned flags)
2163{
2164 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2165 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2166
2167
2168
2169 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2170 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2171 EOP_TC_ACTION_EN |
2172 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2173 EVENT_INDEX(5)));
2174 amdgpu_ring_write(ring, addr & 0xfffffffc);
2175 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2176 DATA_SEL(1) | INT_SEL(0));
2177 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2178 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2179
2180
2181 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2182 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2183 EOP_TC_ACTION_EN |
2184 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2185 EVENT_INDEX(5)));
2186 amdgpu_ring_write(ring, addr & 0xfffffffc);
2187 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2188 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2189 amdgpu_ring_write(ring, lower_32_bits(seq));
2190 amdgpu_ring_write(ring, upper_32_bits(seq));
2191}
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2203 u64 addr, u64 seq,
2204 unsigned flags)
2205{
2206 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2207 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2208
2209
2210 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2211 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2212 EOP_TC_ACTION_EN |
2213 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2214 EVENT_INDEX(5)));
2215 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2216 amdgpu_ring_write(ring, addr & 0xfffffffc);
2217 amdgpu_ring_write(ring, upper_32_bits(addr));
2218 amdgpu_ring_write(ring, lower_32_bits(seq));
2219 amdgpu_ring_write(ring, upper_32_bits(seq));
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2238 struct amdgpu_ib *ib,
2239 unsigned vm_id, bool ctx_switch)
2240{
2241 u32 header, control = 0;
2242
2243
2244 if (ctx_switch) {
2245 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2246 amdgpu_ring_write(ring, 0);
2247 }
2248
2249 if (ib->flags & AMDGPU_IB_FLAG_CE)
2250 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2251 else
2252 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2253
2254 control |= ib->length_dw | (vm_id << 24);
2255
2256 amdgpu_ring_write(ring, header);
2257 amdgpu_ring_write(ring,
2258#ifdef __BIG_ENDIAN
2259 (2 << 0) |
2260#endif
2261 (ib->gpu_addr & 0xFFFFFFFC));
2262 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2263 amdgpu_ring_write(ring, control);
2264}
2265
2266static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2267 struct amdgpu_ib *ib,
2268 unsigned vm_id, bool ctx_switch)
2269{
2270 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
2271
2272 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2273 amdgpu_ring_write(ring,
2274#ifdef __BIG_ENDIAN
2275 (2 << 0) |
2276#endif
2277 (ib->gpu_addr & 0xFFFFFFFC));
2278 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2279 amdgpu_ring_write(ring, control);
2280}
2281
2282static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2283{
2284 uint32_t dw2 = 0;
2285
2286 dw2 |= 0x80000000;
2287 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2288 gfx_v7_0_ring_emit_vgt_flush(ring);
2289
2290 dw2 |= 0x8001;
2291
2292 dw2 |= 0x01000000;
2293
2294 dw2 |= 0x10002;
2295 }
2296
2297 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2298 amdgpu_ring_write(ring, dw2);
2299 amdgpu_ring_write(ring, 0);
2300}
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2312{
2313 struct amdgpu_device *adev = ring->adev;
2314 struct amdgpu_ib ib;
2315 struct dma_fence *f = NULL;
2316 uint32_t scratch;
2317 uint32_t tmp = 0;
2318 long r;
2319
2320 r = amdgpu_gfx_scratch_get(adev, &scratch);
2321 if (r) {
2322 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
2323 return r;
2324 }
2325 WREG32(scratch, 0xCAFEDEAD);
2326 memset(&ib, 0, sizeof(ib));
2327 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2328 if (r) {
2329 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
2330 goto err1;
2331 }
2332 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2333 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2334 ib.ptr[2] = 0xDEADBEEF;
2335 ib.length_dw = 3;
2336
2337 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2338 if (r)
2339 goto err2;
2340
2341 r = dma_fence_wait_timeout(f, false, timeout);
2342 if (r == 0) {
2343 DRM_ERROR("amdgpu: IB test timed out\n");
2344 r = -ETIMEDOUT;
2345 goto err2;
2346 } else if (r < 0) {
2347 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
2348 goto err2;
2349 }
2350 tmp = RREG32(scratch);
2351 if (tmp == 0xDEADBEEF) {
2352 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
2353 r = 0;
2354 } else {
2355 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
2356 scratch, tmp);
2357 r = -EINVAL;
2358 }
2359
2360err2:
2361 amdgpu_ib_free(adev, &ib, NULL);
2362 dma_fence_put(f);
2363err1:
2364 amdgpu_gfx_scratch_free(adev, scratch);
2365 return r;
2366}
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2400{
2401 int i;
2402
2403 if (enable) {
2404 WREG32(mmCP_ME_CNTL, 0);
2405 } else {
2406 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2407 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2408 adev->gfx.gfx_ring[i].ready = false;
2409 }
2410 udelay(50);
2411}
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2422{
2423 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2424 const struct gfx_firmware_header_v1_0 *ce_hdr;
2425 const struct gfx_firmware_header_v1_0 *me_hdr;
2426 const __le32 *fw_data;
2427 unsigned i, fw_size;
2428
2429 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2430 return -EINVAL;
2431
2432 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2433 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2434 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2435
2436 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2437 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2438 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2439 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2440 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2441 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2442 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2443 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2444 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2445
2446 gfx_v7_0_cp_gfx_enable(adev, false);
2447
2448
2449 fw_data = (const __le32 *)
2450 (adev->gfx.pfp_fw->data +
2451 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2452 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2453 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2454 for (i = 0; i < fw_size; i++)
2455 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2456 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2457
2458
2459 fw_data = (const __le32 *)
2460 (adev->gfx.ce_fw->data +
2461 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2462 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2463 WREG32(mmCP_CE_UCODE_ADDR, 0);
2464 for (i = 0; i < fw_size; i++)
2465 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2466 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2467
2468
2469 fw_data = (const __le32 *)
2470 (adev->gfx.me_fw->data +
2471 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2472 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2473 WREG32(mmCP_ME_RAM_WADDR, 0);
2474 for (i = 0; i < fw_size; i++)
2475 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2476 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2477
2478 return 0;
2479}
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2491{
2492 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2493 const struct cs_section_def *sect = NULL;
2494 const struct cs_extent_def *ext = NULL;
2495 int r, i;
2496
2497
2498 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2499 WREG32(mmCP_ENDIAN_SWAP, 0);
2500 WREG32(mmCP_DEVICE_ID, 1);
2501
2502 gfx_v7_0_cp_gfx_enable(adev, true);
2503
2504 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2505 if (r) {
2506 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2507 return r;
2508 }
2509
2510
2511 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2512 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2513 amdgpu_ring_write(ring, 0x8000);
2514 amdgpu_ring_write(ring, 0x8000);
2515
2516
2517 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2518 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2519
2520 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2521 amdgpu_ring_write(ring, 0x80000000);
2522 amdgpu_ring_write(ring, 0x80000000);
2523
2524 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2525 for (ext = sect->section; ext->extent != NULL; ++ext) {
2526 if (sect->id == SECT_CONTEXT) {
2527 amdgpu_ring_write(ring,
2528 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2529 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2530 for (i = 0; i < ext->reg_count; i++)
2531 amdgpu_ring_write(ring, ext->extent[i]);
2532 }
2533 }
2534 }
2535
2536 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2537 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2538 switch (adev->asic_type) {
2539 case CHIP_BONAIRE:
2540 amdgpu_ring_write(ring, 0x16000012);
2541 amdgpu_ring_write(ring, 0x00000000);
2542 break;
2543 case CHIP_KAVERI:
2544 amdgpu_ring_write(ring, 0x00000000);
2545 amdgpu_ring_write(ring, 0x00000000);
2546 break;
2547 case CHIP_KABINI:
2548 case CHIP_MULLINS:
2549 amdgpu_ring_write(ring, 0x00000000);
2550 amdgpu_ring_write(ring, 0x00000000);
2551 break;
2552 case CHIP_HAWAII:
2553 amdgpu_ring_write(ring, 0x3a00161a);
2554 amdgpu_ring_write(ring, 0x0000002e);
2555 break;
2556 default:
2557 amdgpu_ring_write(ring, 0x00000000);
2558 amdgpu_ring_write(ring, 0x00000000);
2559 break;
2560 }
2561
2562 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2563 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2564
2565 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2566 amdgpu_ring_write(ring, 0);
2567
2568 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2569 amdgpu_ring_write(ring, 0x00000316);
2570 amdgpu_ring_write(ring, 0x0000000e);
2571 amdgpu_ring_write(ring, 0x00000010);
2572
2573 amdgpu_ring_commit(ring);
2574
2575 return 0;
2576}
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2588{
2589 struct amdgpu_ring *ring;
2590 u32 tmp;
2591 u32 rb_bufsz;
2592 u64 rb_addr, rptr_addr;
2593 int r;
2594
2595 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2596 if (adev->asic_type != CHIP_HAWAII)
2597 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2598
2599
2600 WREG32(mmCP_RB_WPTR_DELAY, 0);
2601
2602
2603 WREG32(mmCP_RB_VMID, 0);
2604
2605 WREG32(mmSCRATCH_ADDR, 0);
2606
2607
2608
2609 ring = &adev->gfx.gfx_ring[0];
2610 rb_bufsz = order_base_2(ring->ring_size / 8);
2611 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2612#ifdef __BIG_ENDIAN
2613 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2614#endif
2615 WREG32(mmCP_RB0_CNTL, tmp);
2616
2617
2618 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2619 ring->wptr = 0;
2620 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2621
2622
2623 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2624 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2625 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2626
2627
2628 WREG32(mmSCRATCH_UMSK, 0);
2629
2630 mdelay(1);
2631 WREG32(mmCP_RB0_CNTL, tmp);
2632
2633 rb_addr = ring->gpu_addr >> 8;
2634 WREG32(mmCP_RB0_BASE, rb_addr);
2635 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2636
2637
2638 gfx_v7_0_cp_gfx_start(adev);
2639 ring->ready = true;
2640 r = amdgpu_ring_test_ring(ring);
2641 if (r) {
2642 ring->ready = false;
2643 return r;
2644 }
2645
2646 return 0;
2647}
2648
2649static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2650{
2651 return ring->adev->wb.wb[ring->rptr_offs];
2652}
2653
2654static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2655{
2656 struct amdgpu_device *adev = ring->adev;
2657
2658 return RREG32(mmCP_RB0_WPTR);
2659}
2660
2661static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2662{
2663 struct amdgpu_device *adev = ring->adev;
2664
2665 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2666 (void)RREG32(mmCP_RB0_WPTR);
2667}
2668
2669static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2670{
2671
2672 return ring->adev->wb.wb[ring->wptr_offs];
2673}
2674
2675static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2676{
2677 struct amdgpu_device *adev = ring->adev;
2678
2679
2680 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2681 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2682}
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2693{
2694 int i;
2695
2696 if (enable) {
2697 WREG32(mmCP_MEC_CNTL, 0);
2698 } else {
2699 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2700 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2701 adev->gfx.compute_ring[i].ready = false;
2702 }
2703 udelay(50);
2704}
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2715{
2716 const struct gfx_firmware_header_v1_0 *mec_hdr;
2717 const __le32 *fw_data;
2718 unsigned i, fw_size;
2719
2720 if (!adev->gfx.mec_fw)
2721 return -EINVAL;
2722
2723 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2724 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2725 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2726 adev->gfx.mec_feature_version = le32_to_cpu(
2727 mec_hdr->ucode_feature_version);
2728
2729 gfx_v7_0_cp_compute_enable(adev, false);
2730
2731
2732 fw_data = (const __le32 *)
2733 (adev->gfx.mec_fw->data +
2734 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2735 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2736 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2737 for (i = 0; i < fw_size; i++)
2738 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2739 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2740
2741 if (adev->asic_type == CHIP_KAVERI) {
2742 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2743
2744 if (!adev->gfx.mec2_fw)
2745 return -EINVAL;
2746
2747 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2748 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2749 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2750 adev->gfx.mec2_feature_version = le32_to_cpu(
2751 mec2_hdr->ucode_feature_version);
2752
2753
2754 fw_data = (const __le32 *)
2755 (adev->gfx.mec2_fw->data +
2756 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2757 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2758 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2759 for (i = 0; i < fw_size; i++)
2760 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2761 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2762 }
2763
2764 return 0;
2765}
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2776{
2777 int i;
2778
2779 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2780 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2781
2782 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2783 }
2784}
2785
2786static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2787{
2788 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2789}
2790
2791static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2792{
2793 int r;
2794 u32 *hpd;
2795 size_t mec_hpd_size;
2796
2797 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2798
2799
2800 amdgpu_gfx_compute_queue_acquire(adev);
2801
2802
2803 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2804 * GFX7_MEC_HPD_SIZE * 2;
2805
2806 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2807 AMDGPU_GEM_DOMAIN_GTT,
2808 &adev->gfx.mec.hpd_eop_obj,
2809 &adev->gfx.mec.hpd_eop_gpu_addr,
2810 (void **)&hpd);
2811 if (r) {
2812 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2813 gfx_v7_0_mec_fini(adev);
2814 return r;
2815 }
2816
2817
2818 memset(hpd, 0, mec_hpd_size);
2819
2820 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2821 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2822
2823 return 0;
2824}
2825
2826struct hqd_registers
2827{
2828 u32 cp_mqd_base_addr;
2829 u32 cp_mqd_base_addr_hi;
2830 u32 cp_hqd_active;
2831 u32 cp_hqd_vmid;
2832 u32 cp_hqd_persistent_state;
2833 u32 cp_hqd_pipe_priority;
2834 u32 cp_hqd_queue_priority;
2835 u32 cp_hqd_quantum;
2836 u32 cp_hqd_pq_base;
2837 u32 cp_hqd_pq_base_hi;
2838 u32 cp_hqd_pq_rptr;
2839 u32 cp_hqd_pq_rptr_report_addr;
2840 u32 cp_hqd_pq_rptr_report_addr_hi;
2841 u32 cp_hqd_pq_wptr_poll_addr;
2842 u32 cp_hqd_pq_wptr_poll_addr_hi;
2843 u32 cp_hqd_pq_doorbell_control;
2844 u32 cp_hqd_pq_wptr;
2845 u32 cp_hqd_pq_control;
2846 u32 cp_hqd_ib_base_addr;
2847 u32 cp_hqd_ib_base_addr_hi;
2848 u32 cp_hqd_ib_rptr;
2849 u32 cp_hqd_ib_control;
2850 u32 cp_hqd_iq_timer;
2851 u32 cp_hqd_iq_rptr;
2852 u32 cp_hqd_dequeue_request;
2853 u32 cp_hqd_dma_offload;
2854 u32 cp_hqd_sema_cmd;
2855 u32 cp_hqd_msg_type;
2856 u32 cp_hqd_atomic0_preop_lo;
2857 u32 cp_hqd_atomic0_preop_hi;
2858 u32 cp_hqd_atomic1_preop_lo;
2859 u32 cp_hqd_atomic1_preop_hi;
2860 u32 cp_hqd_hq_scheduler0;
2861 u32 cp_hqd_hq_scheduler1;
2862 u32 cp_mqd_control;
2863};
2864
2865static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2866 int mec, int pipe)
2867{
2868 u64 eop_gpu_addr;
2869 u32 tmp;
2870 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2871 * GFX7_MEC_HPD_SIZE * 2;
2872
2873 mutex_lock(&adev->srbm_mutex);
2874 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2875
2876 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2877
2878
2879 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2880 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2881
2882
2883 WREG32(mmCP_HPD_EOP_VMID, 0);
2884
2885
2886 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2887 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2888 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2889 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2890
2891 cik_srbm_select(adev, 0, 0, 0, 0);
2892 mutex_unlock(&adev->srbm_mutex);
2893}
2894
2895static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2896{
2897 int i;
2898
2899
2900 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2901 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2902 for (i = 0; i < adev->usec_timeout; i++) {
2903 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2904 break;
2905 udelay(1);
2906 }
2907
2908 if (i == adev->usec_timeout)
2909 return -ETIMEDOUT;
2910
2911 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2912 WREG32(mmCP_HQD_PQ_RPTR, 0);
2913 WREG32(mmCP_HQD_PQ_WPTR, 0);
2914 }
2915
2916 return 0;
2917}
2918
2919static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2920 struct cik_mqd *mqd,
2921 uint64_t mqd_gpu_addr,
2922 struct amdgpu_ring *ring)
2923{
2924 u64 hqd_gpu_addr;
2925 u64 wb_gpu_addr;
2926
2927
2928 memset(mqd, 0, sizeof(struct cik_mqd));
2929
2930 mqd->header = 0xC0310800;
2931 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2932 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2933 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2934 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2935
2936
2937 mqd->cp_hqd_pq_doorbell_control =
2938 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2939 if (ring->use_doorbell)
2940 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2941 else
2942 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2943
2944
2945 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2946 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2947
2948
2949 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2950 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2951
2952
2953 hqd_gpu_addr = ring->gpu_addr >> 8;
2954 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2955 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2956
2957
2958 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2959 mqd->cp_hqd_pq_control &=
2960 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2961 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2962
2963 mqd->cp_hqd_pq_control |=
2964 order_base_2(ring->ring_size / 8);
2965 mqd->cp_hqd_pq_control |=
2966 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2967#ifdef __BIG_ENDIAN
2968 mqd->cp_hqd_pq_control |=
2969 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2970#endif
2971 mqd->cp_hqd_pq_control &=
2972 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2973 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2974 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2975 mqd->cp_hqd_pq_control |=
2976 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2977 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2978
2979
2980 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2981 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2982 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2983
2984
2985 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2986 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2987 mqd->cp_hqd_pq_rptr_report_addr_hi =
2988 upper_32_bits(wb_gpu_addr) & 0xffff;
2989
2990
2991 if (ring->use_doorbell) {
2992 mqd->cp_hqd_pq_doorbell_control =
2993 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2994 mqd->cp_hqd_pq_doorbell_control &=
2995 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
2996 mqd->cp_hqd_pq_doorbell_control |=
2997 (ring->doorbell_index <<
2998 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
2999 mqd->cp_hqd_pq_doorbell_control |=
3000 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3001 mqd->cp_hqd_pq_doorbell_control &=
3002 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3003 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3004
3005 } else {
3006 mqd->cp_hqd_pq_doorbell_control = 0;
3007 }
3008
3009
3010 ring->wptr = 0;
3011 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3012 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3013
3014
3015 mqd->cp_hqd_vmid = 0;
3016
3017
3018 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3019 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3020 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3021 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3022 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3023 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3024 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3025 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3026 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3027 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3028 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3029 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3030 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3031 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3032 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3033 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3034
3035
3036 mqd->cp_hqd_active = 1;
3037}
3038
3039int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3040{
3041 uint32_t tmp;
3042 uint32_t mqd_reg;
3043 uint32_t *mqd_data;
3044
3045
3046 mqd_data = &mqd->cp_mqd_base_addr_lo;
3047
3048
3049 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3050 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3051 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3052
3053
3054 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3055 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3056
3057
3058 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3059 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3060
3061 return 0;
3062}
3063
3064static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3065{
3066 int r;
3067 u64 mqd_gpu_addr;
3068 struct cik_mqd *mqd;
3069 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3070
3071 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3072 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3073 &mqd_gpu_addr, (void **)&mqd);
3074 if (r) {
3075 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3076 return r;
3077 }
3078
3079 mutex_lock(&adev->srbm_mutex);
3080 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3081
3082 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3083 gfx_v7_0_mqd_deactivate(adev);
3084 gfx_v7_0_mqd_commit(adev, mqd);
3085
3086 cik_srbm_select(adev, 0, 0, 0, 0);
3087 mutex_unlock(&adev->srbm_mutex);
3088
3089 amdgpu_bo_kunmap(ring->mqd_obj);
3090 amdgpu_bo_unreserve(ring->mqd_obj);
3091 return 0;
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3104{
3105 int r, i, j;
3106 u32 tmp;
3107 struct amdgpu_ring *ring;
3108
3109
3110 tmp = RREG32(mmCP_CPF_DEBUG);
3111 tmp |= (1 << 23);
3112 WREG32(mmCP_CPF_DEBUG, tmp);
3113
3114
3115 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3116 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3117 gfx_v7_0_compute_pipe_init(adev, i, j);
3118
3119
3120 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3121 r = gfx_v7_0_compute_queue_init(adev, i);
3122 if (r) {
3123 gfx_v7_0_cp_compute_fini(adev);
3124 return r;
3125 }
3126 }
3127
3128 gfx_v7_0_cp_compute_enable(adev, true);
3129
3130 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3131 ring = &adev->gfx.compute_ring[i];
3132 ring->ready = true;
3133 r = amdgpu_ring_test_ring(ring);
3134 if (r)
3135 ring->ready = false;
3136 }
3137
3138 return 0;
3139}
3140
3141static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3142{
3143 gfx_v7_0_cp_gfx_enable(adev, enable);
3144 gfx_v7_0_cp_compute_enable(adev, enable);
3145}
3146
3147static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3148{
3149 int r;
3150
3151 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3152 if (r)
3153 return r;
3154 r = gfx_v7_0_cp_compute_load_microcode(adev);
3155 if (r)
3156 return r;
3157
3158 return 0;
3159}
3160
3161static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3162 bool enable)
3163{
3164 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3165
3166 if (enable)
3167 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3168 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3169 else
3170 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3171 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3172 WREG32(mmCP_INT_CNTL_RING0, tmp);
3173}
3174
3175static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3176{
3177 int r;
3178
3179 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3180
3181 r = gfx_v7_0_cp_load_microcode(adev);
3182 if (r)
3183 return r;
3184
3185 r = gfx_v7_0_cp_gfx_resume(adev);
3186 if (r)
3187 return r;
3188 r = gfx_v7_0_cp_compute_resume(adev);
3189 if (r)
3190 return r;
3191
3192 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3193
3194 return 0;
3195}
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3206{
3207 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3208 uint32_t seq = ring->fence_drv.sync_seq;
3209 uint64_t addr = ring->fence_drv.gpu_addr;
3210
3211 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3212 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3213 WAIT_REG_MEM_FUNCTION(3) |
3214 WAIT_REG_MEM_ENGINE(usepfp)));
3215 amdgpu_ring_write(ring, addr & 0xfffffffc);
3216 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3217 amdgpu_ring_write(ring, seq);
3218 amdgpu_ring_write(ring, 0xffffffff);
3219 amdgpu_ring_write(ring, 4);
3220
3221 if (usepfp) {
3222
3223 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3224 amdgpu_ring_write(ring, 0);
3225 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3226 amdgpu_ring_write(ring, 0);
3227 }
3228}
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3245 unsigned vm_id, uint64_t pd_addr)
3246{
3247 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3248
3249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3251 WRITE_DATA_DST_SEL(0)));
3252 if (vm_id < 8) {
3253 amdgpu_ring_write(ring,
3254 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
3255 } else {
3256 amdgpu_ring_write(ring,
3257 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
3258 }
3259 amdgpu_ring_write(ring, 0);
3260 amdgpu_ring_write(ring, pd_addr >> 12);
3261
3262
3263 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3264 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3265 WRITE_DATA_DST_SEL(0)));
3266 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3267 amdgpu_ring_write(ring, 0);
3268 amdgpu_ring_write(ring, 1 << vm_id);
3269
3270
3271 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3272 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3273 WAIT_REG_MEM_FUNCTION(0) |
3274 WAIT_REG_MEM_ENGINE(0)));
3275 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3276 amdgpu_ring_write(ring, 0);
3277 amdgpu_ring_write(ring, 0);
3278 amdgpu_ring_write(ring, 0);
3279 amdgpu_ring_write(ring, 0x20);
3280
3281
3282 if (usepfp) {
3283
3284 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3285 amdgpu_ring_write(ring, 0x0);
3286
3287
3288 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3289 amdgpu_ring_write(ring, 0);
3290 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3291 amdgpu_ring_write(ring, 0);
3292 }
3293}
3294
3295
3296
3297
3298
3299
3300static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
3301{
3302 amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
3303 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
3304 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
3305}
3306
3307static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3308{
3309 const u32 *src_ptr;
3310 volatile u32 *dst_ptr;
3311 u32 dws, i;
3312 const struct cs_section_def *cs_data;
3313 int r;
3314
3315
3316 if (adev->flags & AMD_IS_APU) {
3317 if (adev->asic_type == CHIP_KAVERI) {
3318 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3319 adev->gfx.rlc.reg_list_size =
3320 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3321 } else {
3322 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3323 adev->gfx.rlc.reg_list_size =
3324 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3325 }
3326 }
3327 adev->gfx.rlc.cs_data = ci_cs_data;
3328 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3329 adev->gfx.rlc.cp_table_size += 64 * 1024;
3330
3331 src_ptr = adev->gfx.rlc.reg_list;
3332 dws = adev->gfx.rlc.reg_list_size;
3333 dws += (5 * 16) + 48 + 48 + 64;
3334
3335 cs_data = adev->gfx.rlc.cs_data;
3336
3337 if (src_ptr) {
3338
3339 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
3340 AMDGPU_GEM_DOMAIN_VRAM,
3341 &adev->gfx.rlc.save_restore_obj,
3342 &adev->gfx.rlc.save_restore_gpu_addr,
3343 (void **)&adev->gfx.rlc.sr_ptr);
3344 if (r) {
3345 dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
3346 gfx_v7_0_rlc_fini(adev);
3347 return r;
3348 }
3349
3350
3351 dst_ptr = adev->gfx.rlc.sr_ptr;
3352 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3353 dst_ptr[i] = cpu_to_le32(src_ptr[i]);
3354 amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
3355 amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3356 }
3357
3358 if (cs_data) {
3359
3360 adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
3361
3362 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
3363 AMDGPU_GEM_DOMAIN_VRAM,
3364 &adev->gfx.rlc.clear_state_obj,
3365 &adev->gfx.rlc.clear_state_gpu_addr,
3366 (void **)&adev->gfx.rlc.cs_ptr);
3367 if (r) {
3368 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
3369 gfx_v7_0_rlc_fini(adev);
3370 return r;
3371 }
3372
3373
3374 dst_ptr = adev->gfx.rlc.cs_ptr;
3375 gfx_v7_0_get_csb_buffer(adev, dst_ptr);
3376 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
3377 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3378 }
3379
3380 if (adev->gfx.rlc.cp_table_size) {
3381
3382 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
3383 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
3384 &adev->gfx.rlc.cp_table_obj,
3385 &adev->gfx.rlc.cp_table_gpu_addr,
3386 (void **)&adev->gfx.rlc.cp_table_ptr);
3387 if (r) {
3388 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
3389 gfx_v7_0_rlc_fini(adev);
3390 return r;
3391 }
3392
3393 gfx_v7_0_init_cp_pg_table(adev);
3394
3395 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
3396 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3397
3398 }
3399
3400 return 0;
3401}
3402
3403static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3404{
3405 u32 tmp;
3406
3407 tmp = RREG32(mmRLC_LB_CNTL);
3408 if (enable)
3409 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3410 else
3411 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3412 WREG32(mmRLC_LB_CNTL, tmp);
3413}
3414
3415static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3416{
3417 u32 i, j, k;
3418 u32 mask;
3419
3420 mutex_lock(&adev->grbm_idx_mutex);
3421 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3422 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3423 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3424 for (k = 0; k < adev->usec_timeout; k++) {
3425 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3426 break;
3427 udelay(1);
3428 }
3429 }
3430 }
3431 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3432 mutex_unlock(&adev->grbm_idx_mutex);
3433
3434 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3435 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3436 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3437 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3438 for (k = 0; k < adev->usec_timeout; k++) {
3439 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3440 break;
3441 udelay(1);
3442 }
3443}
3444
3445static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3446{
3447 u32 tmp;
3448
3449 tmp = RREG32(mmRLC_CNTL);
3450 if (tmp != rlc)
3451 WREG32(mmRLC_CNTL, rlc);
3452}
3453
3454static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3455{
3456 u32 data, orig;
3457
3458 orig = data = RREG32(mmRLC_CNTL);
3459
3460 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3461 u32 i;
3462
3463 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3464 WREG32(mmRLC_CNTL, data);
3465
3466 for (i = 0; i < adev->usec_timeout; i++) {
3467 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3468 break;
3469 udelay(1);
3470 }
3471
3472 gfx_v7_0_wait_for_rlc_serdes(adev);
3473 }
3474
3475 return orig;
3476}
3477
3478static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3479{
3480 u32 tmp, i, mask;
3481
3482 tmp = 0x1 | (1 << 1);
3483 WREG32(mmRLC_GPR_REG2, tmp);
3484
3485 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3486 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3487 for (i = 0; i < adev->usec_timeout; i++) {
3488 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3489 break;
3490 udelay(1);
3491 }
3492
3493 for (i = 0; i < adev->usec_timeout; i++) {
3494 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3495 break;
3496 udelay(1);
3497 }
3498}
3499
3500static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3501{
3502 u32 tmp;
3503
3504 tmp = 0x1 | (0 << 1);
3505 WREG32(mmRLC_GPR_REG2, tmp);
3506}
3507
3508
3509
3510
3511
3512
3513
3514
3515static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3516{
3517 WREG32(mmRLC_CNTL, 0);
3518
3519 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3520
3521 gfx_v7_0_wait_for_rlc_serdes(adev);
3522}
3523
3524
3525
3526
3527
3528
3529
3530
3531static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3532{
3533 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3534
3535 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3536
3537 udelay(50);
3538}
3539
3540static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3541{
3542 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3543
3544 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3545 WREG32(mmGRBM_SOFT_RESET, tmp);
3546 udelay(50);
3547 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3548 WREG32(mmGRBM_SOFT_RESET, tmp);
3549 udelay(50);
3550}
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3562{
3563 const struct rlc_firmware_header_v1_0 *hdr;
3564 const __le32 *fw_data;
3565 unsigned i, fw_size;
3566 u32 tmp;
3567
3568 if (!adev->gfx.rlc_fw)
3569 return -EINVAL;
3570
3571 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3572 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3573 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3574 adev->gfx.rlc_feature_version = le32_to_cpu(
3575 hdr->ucode_feature_version);
3576
3577 gfx_v7_0_rlc_stop(adev);
3578
3579
3580 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3581 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3582
3583 gfx_v7_0_rlc_reset(adev);
3584
3585 gfx_v7_0_init_pg(adev);
3586
3587 WREG32(mmRLC_LB_CNTR_INIT, 0);
3588 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3589
3590 mutex_lock(&adev->grbm_idx_mutex);
3591 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3592 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3593 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3594 WREG32(mmRLC_LB_CNTL, 0x80000004);
3595 mutex_unlock(&adev->grbm_idx_mutex);
3596
3597 WREG32(mmRLC_MC_CNTL, 0);
3598 WREG32(mmRLC_UCODE_CNTL, 0);
3599
3600 fw_data = (const __le32 *)
3601 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3602 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3603 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3604 for (i = 0; i < fw_size; i++)
3605 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3606 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3607
3608
3609 gfx_v7_0_enable_lbpw(adev, false);
3610
3611 if (adev->asic_type == CHIP_BONAIRE)
3612 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3613
3614 gfx_v7_0_rlc_start(adev);
3615
3616 return 0;
3617}
3618
3619static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3620{
3621 u32 data, orig, tmp, tmp2;
3622
3623 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3624
3625 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3626 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3627
3628 tmp = gfx_v7_0_halt_rlc(adev);
3629
3630 mutex_lock(&adev->grbm_idx_mutex);
3631 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3632 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3633 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3634 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3635 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3636 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3637 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3638 mutex_unlock(&adev->grbm_idx_mutex);
3639
3640 gfx_v7_0_update_rlc(adev, tmp);
3641
3642 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3643 if (orig != data)
3644 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3645
3646 } else {
3647 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3648
3649 RREG32(mmCB_CGTT_SCLK_CTRL);
3650 RREG32(mmCB_CGTT_SCLK_CTRL);
3651 RREG32(mmCB_CGTT_SCLK_CTRL);
3652 RREG32(mmCB_CGTT_SCLK_CTRL);
3653
3654 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3655 if (orig != data)
3656 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3657
3658 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3659 }
3660}
3661
3662static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3663{
3664 u32 data, orig, tmp = 0;
3665
3666 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3667 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3668 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3669 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3670 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3671 if (orig != data)
3672 WREG32(mmCP_MEM_SLP_CNTL, data);
3673 }
3674 }
3675
3676 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3677 data |= 0x00000001;
3678 data &= 0xfffffffd;
3679 if (orig != data)
3680 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3681
3682 tmp = gfx_v7_0_halt_rlc(adev);
3683
3684 mutex_lock(&adev->grbm_idx_mutex);
3685 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3686 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3687 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3688 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3689 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3690 WREG32(mmRLC_SERDES_WR_CTRL, data);
3691 mutex_unlock(&adev->grbm_idx_mutex);
3692
3693 gfx_v7_0_update_rlc(adev, tmp);
3694
3695 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3696 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3697 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3698 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3699 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3700 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3701 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3702 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3703 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3704 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3705 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3706 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3707 if (orig != data)
3708 WREG32(mmCGTS_SM_CTRL_REG, data);
3709 }
3710 } else {
3711 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3712 data |= 0x00000003;
3713 if (orig != data)
3714 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3715
3716 data = RREG32(mmRLC_MEM_SLP_CNTL);
3717 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3718 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3719 WREG32(mmRLC_MEM_SLP_CNTL, data);
3720 }
3721
3722 data = RREG32(mmCP_MEM_SLP_CNTL);
3723 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3724 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3725 WREG32(mmCP_MEM_SLP_CNTL, data);
3726 }
3727
3728 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3729 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3730 if (orig != data)
3731 WREG32(mmCGTS_SM_CTRL_REG, data);
3732
3733 tmp = gfx_v7_0_halt_rlc(adev);
3734
3735 mutex_lock(&adev->grbm_idx_mutex);
3736 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3737 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3738 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3739 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3740 WREG32(mmRLC_SERDES_WR_CTRL, data);
3741 mutex_unlock(&adev->grbm_idx_mutex);
3742
3743 gfx_v7_0_update_rlc(adev, tmp);
3744 }
3745}
3746
3747static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3748 bool enable)
3749{
3750 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3751
3752 if (enable) {
3753 gfx_v7_0_enable_mgcg(adev, true);
3754 gfx_v7_0_enable_cgcg(adev, true);
3755 } else {
3756 gfx_v7_0_enable_cgcg(adev, false);
3757 gfx_v7_0_enable_mgcg(adev, false);
3758 }
3759 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3760}
3761
3762static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3763 bool enable)
3764{
3765 u32 data, orig;
3766
3767 orig = data = RREG32(mmRLC_PG_CNTL);
3768 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3769 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3770 else
3771 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3772 if (orig != data)
3773 WREG32(mmRLC_PG_CNTL, data);
3774}
3775
3776static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3777 bool enable)
3778{
3779 u32 data, orig;
3780
3781 orig = data = RREG32(mmRLC_PG_CNTL);
3782 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3783 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3784 else
3785 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3786 if (orig != data)
3787 WREG32(mmRLC_PG_CNTL, data);
3788}
3789
3790static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3791{
3792 u32 data, orig;
3793
3794 orig = data = RREG32(mmRLC_PG_CNTL);
3795 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3796 data &= ~0x8000;
3797 else
3798 data |= 0x8000;
3799 if (orig != data)
3800 WREG32(mmRLC_PG_CNTL, data);
3801}
3802
3803static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3804{
3805 u32 data, orig;
3806
3807 orig = data = RREG32(mmRLC_PG_CNTL);
3808 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3809 data &= ~0x2000;
3810 else
3811 data |= 0x2000;
3812 if (orig != data)
3813 WREG32(mmRLC_PG_CNTL, data);
3814}
3815
3816static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
3817{
3818 const __le32 *fw_data;
3819 volatile u32 *dst_ptr;
3820 int me, i, max_me = 4;
3821 u32 bo_offset = 0;
3822 u32 table_offset, table_size;
3823
3824 if (adev->asic_type == CHIP_KAVERI)
3825 max_me = 5;
3826
3827 if (adev->gfx.rlc.cp_table_ptr == NULL)
3828 return;
3829
3830
3831 dst_ptr = adev->gfx.rlc.cp_table_ptr;
3832 for (me = 0; me < max_me; me++) {
3833 if (me == 0) {
3834 const struct gfx_firmware_header_v1_0 *hdr =
3835 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
3836 fw_data = (const __le32 *)
3837 (adev->gfx.ce_fw->data +
3838 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3839 table_offset = le32_to_cpu(hdr->jt_offset);
3840 table_size = le32_to_cpu(hdr->jt_size);
3841 } else if (me == 1) {
3842 const struct gfx_firmware_header_v1_0 *hdr =
3843 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
3844 fw_data = (const __le32 *)
3845 (adev->gfx.pfp_fw->data +
3846 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3847 table_offset = le32_to_cpu(hdr->jt_offset);
3848 table_size = le32_to_cpu(hdr->jt_size);
3849 } else if (me == 2) {
3850 const struct gfx_firmware_header_v1_0 *hdr =
3851 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
3852 fw_data = (const __le32 *)
3853 (adev->gfx.me_fw->data +
3854 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3855 table_offset = le32_to_cpu(hdr->jt_offset);
3856 table_size = le32_to_cpu(hdr->jt_size);
3857 } else if (me == 3) {
3858 const struct gfx_firmware_header_v1_0 *hdr =
3859 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3860 fw_data = (const __le32 *)
3861 (adev->gfx.mec_fw->data +
3862 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3863 table_offset = le32_to_cpu(hdr->jt_offset);
3864 table_size = le32_to_cpu(hdr->jt_size);
3865 } else {
3866 const struct gfx_firmware_header_v1_0 *hdr =
3867 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3868 fw_data = (const __le32 *)
3869 (adev->gfx.mec2_fw->data +
3870 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3871 table_offset = le32_to_cpu(hdr->jt_offset);
3872 table_size = le32_to_cpu(hdr->jt_size);
3873 }
3874
3875 for (i = 0; i < table_size; i ++) {
3876 dst_ptr[bo_offset + i] =
3877 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
3878 }
3879
3880 bo_offset += table_size;
3881 }
3882}
3883
3884static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3885 bool enable)
3886{
3887 u32 data, orig;
3888
3889 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3890 orig = data = RREG32(mmRLC_PG_CNTL);
3891 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3892 if (orig != data)
3893 WREG32(mmRLC_PG_CNTL, data);
3894
3895 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3896 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3897 if (orig != data)
3898 WREG32(mmRLC_AUTO_PG_CTRL, data);
3899 } else {
3900 orig = data = RREG32(mmRLC_PG_CNTL);
3901 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3902 if (orig != data)
3903 WREG32(mmRLC_PG_CNTL, data);
3904
3905 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3906 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3907 if (orig != data)
3908 WREG32(mmRLC_AUTO_PG_CTRL, data);
3909
3910 data = RREG32(mmDB_RENDER_CONTROL);
3911 }
3912}
3913
3914static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3915 u32 bitmap)
3916{
3917 u32 data;
3918
3919 if (!bitmap)
3920 return;
3921
3922 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3923 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3924
3925 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3926}
3927
3928static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3929{
3930 u32 data, mask;
3931
3932 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3933 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3934
3935 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3936 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3937
3938 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3939
3940 return (~data) & mask;
3941}
3942
3943static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3944{
3945 u32 tmp;
3946
3947 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3948
3949 tmp = RREG32(mmRLC_MAX_PG_CU);
3950 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3951 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3952 WREG32(mmRLC_MAX_PG_CU, tmp);
3953}
3954
3955static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3956 bool enable)
3957{
3958 u32 data, orig;
3959
3960 orig = data = RREG32(mmRLC_PG_CNTL);
3961 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3962 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3963 else
3964 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3965 if (orig != data)
3966 WREG32(mmRLC_PG_CNTL, data);
3967}
3968
3969static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3970 bool enable)
3971{
3972 u32 data, orig;
3973
3974 orig = data = RREG32(mmRLC_PG_CNTL);
3975 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3976 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3977 else
3978 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3979 if (orig != data)
3980 WREG32(mmRLC_PG_CNTL, data);
3981}
3982
3983#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3984#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3985
3986static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3987{
3988 u32 data, orig;
3989 u32 i;
3990
3991 if (adev->gfx.rlc.cs_data) {
3992 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3993 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3994 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3995 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3996 } else {
3997 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3998 for (i = 0; i < 3; i++)
3999 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
4000 }
4001 if (adev->gfx.rlc.reg_list) {
4002 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
4003 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
4004 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
4005 }
4006
4007 orig = data = RREG32(mmRLC_PG_CNTL);
4008 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
4009 if (orig != data)
4010 WREG32(mmRLC_PG_CNTL, data);
4011
4012 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
4013 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4014
4015 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
4016 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
4017 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4018 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
4019
4020 data = 0x10101010;
4021 WREG32(mmRLC_PG_DELAY, data);
4022
4023 data = RREG32(mmRLC_PG_DELAY_2);
4024 data &= ~0xff;
4025 data |= 0x3;
4026 WREG32(mmRLC_PG_DELAY_2, data);
4027
4028 data = RREG32(mmRLC_AUTO_PG_CTRL);
4029 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
4030 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
4031 WREG32(mmRLC_AUTO_PG_CTRL, data);
4032
4033}
4034
4035static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
4036{
4037 gfx_v7_0_enable_gfx_cgpg(adev, enable);
4038 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
4039 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
4040}
4041
4042static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
4043{
4044 u32 count = 0;
4045 const struct cs_section_def *sect = NULL;
4046 const struct cs_extent_def *ext = NULL;
4047
4048 if (adev->gfx.rlc.cs_data == NULL)
4049 return 0;
4050
4051
4052 count += 2;
4053
4054 count += 3;
4055
4056 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4057 for (ext = sect->section; ext->extent != NULL; ++ext) {
4058 if (sect->id == SECT_CONTEXT)
4059 count += 2 + ext->reg_count;
4060 else
4061 return 0;
4062 }
4063 }
4064
4065 count += 4;
4066
4067 count += 2;
4068
4069 count += 2;
4070
4071 return count;
4072}
4073
4074static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
4075 volatile u32 *buffer)
4076{
4077 u32 count = 0, i;
4078 const struct cs_section_def *sect = NULL;
4079 const struct cs_extent_def *ext = NULL;
4080
4081 if (adev->gfx.rlc.cs_data == NULL)
4082 return;
4083 if (buffer == NULL)
4084 return;
4085
4086 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4087 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4088
4089 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4090 buffer[count++] = cpu_to_le32(0x80000000);
4091 buffer[count++] = cpu_to_le32(0x80000000);
4092
4093 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4094 for (ext = sect->section; ext->extent != NULL; ++ext) {
4095 if (sect->id == SECT_CONTEXT) {
4096 buffer[count++] =
4097 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4098 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4099 for (i = 0; i < ext->reg_count; i++)
4100 buffer[count++] = cpu_to_le32(ext->extent[i]);
4101 } else {
4102 return;
4103 }
4104 }
4105 }
4106
4107 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4108 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4109 switch (adev->asic_type) {
4110 case CHIP_BONAIRE:
4111 buffer[count++] = cpu_to_le32(0x16000012);
4112 buffer[count++] = cpu_to_le32(0x00000000);
4113 break;
4114 case CHIP_KAVERI:
4115 buffer[count++] = cpu_to_le32(0x00000000);
4116 buffer[count++] = cpu_to_le32(0x00000000);
4117 break;
4118 case CHIP_KABINI:
4119 case CHIP_MULLINS:
4120 buffer[count++] = cpu_to_le32(0x00000000);
4121 buffer[count++] = cpu_to_le32(0x00000000);
4122 break;
4123 case CHIP_HAWAII:
4124 buffer[count++] = cpu_to_le32(0x3a00161a);
4125 buffer[count++] = cpu_to_le32(0x0000002e);
4126 break;
4127 default:
4128 buffer[count++] = cpu_to_le32(0x00000000);
4129 buffer[count++] = cpu_to_le32(0x00000000);
4130 break;
4131 }
4132
4133 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4134 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4135
4136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4137 buffer[count++] = cpu_to_le32(0);
4138}
4139
4140static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4141{
4142 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4143 AMD_PG_SUPPORT_GFX_SMG |
4144 AMD_PG_SUPPORT_GFX_DMG |
4145 AMD_PG_SUPPORT_CP |
4146 AMD_PG_SUPPORT_GDS |
4147 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4148 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4149 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4150 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4151 gfx_v7_0_init_gfx_cgpg(adev);
4152 gfx_v7_0_enable_cp_pg(adev, true);
4153 gfx_v7_0_enable_gds_pg(adev, true);
4154 }
4155 gfx_v7_0_init_ao_cu_mask(adev);
4156 gfx_v7_0_update_gfx_pg(adev, true);
4157 }
4158}
4159
4160static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4161{
4162 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4163 AMD_PG_SUPPORT_GFX_SMG |
4164 AMD_PG_SUPPORT_GFX_DMG |
4165 AMD_PG_SUPPORT_CP |
4166 AMD_PG_SUPPORT_GDS |
4167 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4168 gfx_v7_0_update_gfx_pg(adev, false);
4169 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4170 gfx_v7_0_enable_cp_pg(adev, false);
4171 gfx_v7_0_enable_gds_pg(adev, false);
4172 }
4173 }
4174}
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4185{
4186 uint64_t clock;
4187
4188 mutex_lock(&adev->gfx.gpu_clock_mutex);
4189 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4190 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4191 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4192 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4193 return clock;
4194}
4195
4196static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4197 uint32_t vmid,
4198 uint32_t gds_base, uint32_t gds_size,
4199 uint32_t gws_base, uint32_t gws_size,
4200 uint32_t oa_base, uint32_t oa_size)
4201{
4202 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4203 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4204
4205 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4206 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4207
4208 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4209 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4210
4211
4212 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4213 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4214 WRITE_DATA_DST_SEL(0)));
4215 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4216 amdgpu_ring_write(ring, 0);
4217 amdgpu_ring_write(ring, gds_base);
4218
4219
4220 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4221 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4222 WRITE_DATA_DST_SEL(0)));
4223 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4224 amdgpu_ring_write(ring, 0);
4225 amdgpu_ring_write(ring, gds_size);
4226
4227
4228 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4229 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4230 WRITE_DATA_DST_SEL(0)));
4231 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4232 amdgpu_ring_write(ring, 0);
4233 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4234
4235
4236 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4237 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4238 WRITE_DATA_DST_SEL(0)));
4239 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4240 amdgpu_ring_write(ring, 0);
4241 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4242}
4243
4244static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4245{
4246 WREG32(mmSQ_IND_INDEX,
4247 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4248 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4249 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4250 (SQ_IND_INDEX__FORCE_READ_MASK));
4251 return RREG32(mmSQ_IND_DATA);
4252}
4253
4254static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4255 uint32_t wave, uint32_t thread,
4256 uint32_t regno, uint32_t num, uint32_t *out)
4257{
4258 WREG32(mmSQ_IND_INDEX,
4259 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4260 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4261 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4262 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4263 (SQ_IND_INDEX__FORCE_READ_MASK) |
4264 (SQ_IND_INDEX__AUTO_INCR_MASK));
4265 while (num--)
4266 *(out++) = RREG32(mmSQ_IND_DATA);
4267}
4268
4269static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4270{
4271
4272 dst[(*no_fields)++] = 0;
4273 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4274 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4275 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4277 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4278 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4291}
4292
4293static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4294 uint32_t wave, uint32_t start,
4295 uint32_t size, uint32_t *dst)
4296{
4297 wave_read_regs(
4298 adev, simd, wave, 0,
4299 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4300}
4301
4302static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4303 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4304 .select_se_sh = &gfx_v7_0_select_se_sh,
4305 .read_wave_data = &gfx_v7_0_read_wave_data,
4306 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4307};
4308
4309static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4310 .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
4311 .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
4312};
4313
4314static int gfx_v7_0_early_init(void *handle)
4315{
4316 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4317
4318 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4319 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4320 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4321 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4322 gfx_v7_0_set_ring_funcs(adev);
4323 gfx_v7_0_set_irq_funcs(adev);
4324 gfx_v7_0_set_gds_init(adev);
4325
4326 return 0;
4327}
4328
4329static int gfx_v7_0_late_init(void *handle)
4330{
4331 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4332 int r;
4333
4334 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4335 if (r)
4336 return r;
4337
4338 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4339 if (r)
4340 return r;
4341
4342 return 0;
4343}
4344
4345static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4346{
4347 u32 gb_addr_config;
4348 u32 mc_shared_chmap, mc_arb_ramcfg;
4349 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4350 u32 tmp;
4351
4352 switch (adev->asic_type) {
4353 case CHIP_BONAIRE:
4354 adev->gfx.config.max_shader_engines = 2;
4355 adev->gfx.config.max_tile_pipes = 4;
4356 adev->gfx.config.max_cu_per_sh = 7;
4357 adev->gfx.config.max_sh_per_se = 1;
4358 adev->gfx.config.max_backends_per_se = 2;
4359 adev->gfx.config.max_texture_channel_caches = 4;
4360 adev->gfx.config.max_gprs = 256;
4361 adev->gfx.config.max_gs_threads = 32;
4362 adev->gfx.config.max_hw_contexts = 8;
4363
4364 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4365 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4366 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4367 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4368 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4369 break;
4370 case CHIP_HAWAII:
4371 adev->gfx.config.max_shader_engines = 4;
4372 adev->gfx.config.max_tile_pipes = 16;
4373 adev->gfx.config.max_cu_per_sh = 11;
4374 adev->gfx.config.max_sh_per_se = 1;
4375 adev->gfx.config.max_backends_per_se = 4;
4376 adev->gfx.config.max_texture_channel_caches = 16;
4377 adev->gfx.config.max_gprs = 256;
4378 adev->gfx.config.max_gs_threads = 32;
4379 adev->gfx.config.max_hw_contexts = 8;
4380
4381 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4382 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4383 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4384 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4385 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4386 break;
4387 case CHIP_KAVERI:
4388 adev->gfx.config.max_shader_engines = 1;
4389 adev->gfx.config.max_tile_pipes = 4;
4390 if ((adev->pdev->device == 0x1304) ||
4391 (adev->pdev->device == 0x1305) ||
4392 (adev->pdev->device == 0x130C) ||
4393 (adev->pdev->device == 0x130F) ||
4394 (adev->pdev->device == 0x1310) ||
4395 (adev->pdev->device == 0x1311) ||
4396 (adev->pdev->device == 0x131C)) {
4397 adev->gfx.config.max_cu_per_sh = 8;
4398 adev->gfx.config.max_backends_per_se = 2;
4399 } else if ((adev->pdev->device == 0x1309) ||
4400 (adev->pdev->device == 0x130A) ||
4401 (adev->pdev->device == 0x130D) ||
4402 (adev->pdev->device == 0x1313) ||
4403 (adev->pdev->device == 0x131D)) {
4404 adev->gfx.config.max_cu_per_sh = 6;
4405 adev->gfx.config.max_backends_per_se = 2;
4406 } else if ((adev->pdev->device == 0x1306) ||
4407 (adev->pdev->device == 0x1307) ||
4408 (adev->pdev->device == 0x130B) ||
4409 (adev->pdev->device == 0x130E) ||
4410 (adev->pdev->device == 0x1315) ||
4411 (adev->pdev->device == 0x131B)) {
4412 adev->gfx.config.max_cu_per_sh = 4;
4413 adev->gfx.config.max_backends_per_se = 1;
4414 } else {
4415 adev->gfx.config.max_cu_per_sh = 3;
4416 adev->gfx.config.max_backends_per_se = 1;
4417 }
4418 adev->gfx.config.max_sh_per_se = 1;
4419 adev->gfx.config.max_texture_channel_caches = 4;
4420 adev->gfx.config.max_gprs = 256;
4421 adev->gfx.config.max_gs_threads = 16;
4422 adev->gfx.config.max_hw_contexts = 8;
4423
4424 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4425 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4426 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4427 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4428 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4429 break;
4430 case CHIP_KABINI:
4431 case CHIP_MULLINS:
4432 default:
4433 adev->gfx.config.max_shader_engines = 1;
4434 adev->gfx.config.max_tile_pipes = 2;
4435 adev->gfx.config.max_cu_per_sh = 2;
4436 adev->gfx.config.max_sh_per_se = 1;
4437 adev->gfx.config.max_backends_per_se = 1;
4438 adev->gfx.config.max_texture_channel_caches = 2;
4439 adev->gfx.config.max_gprs = 256;
4440 adev->gfx.config.max_gs_threads = 16;
4441 adev->gfx.config.max_hw_contexts = 8;
4442
4443 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4444 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4445 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4446 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4447 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4448 break;
4449 }
4450
4451 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
4452 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4453 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4454
4455 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4456 adev->gfx.config.mem_max_burst_length_bytes = 256;
4457 if (adev->flags & AMD_IS_APU) {
4458
4459 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4460 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4461 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4462
4463 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4464 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4465 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4466
4467
4468 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4469 dimm00_addr_map = 0;
4470 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4471 dimm01_addr_map = 0;
4472 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4473 dimm10_addr_map = 0;
4474 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4475 dimm11_addr_map = 0;
4476
4477
4478
4479 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4480 adev->gfx.config.mem_row_size_in_kb = 2;
4481 else
4482 adev->gfx.config.mem_row_size_in_kb = 1;
4483 } else {
4484 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4485 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4486 if (adev->gfx.config.mem_row_size_in_kb > 4)
4487 adev->gfx.config.mem_row_size_in_kb = 4;
4488 }
4489
4490 adev->gfx.config.shader_engine_tile_size = 32;
4491 adev->gfx.config.num_gpus = 1;
4492 adev->gfx.config.multi_gpu_tile_size = 64;
4493
4494
4495 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4496 switch (adev->gfx.config.mem_row_size_in_kb) {
4497 case 1:
4498 default:
4499 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4500 break;
4501 case 2:
4502 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4503 break;
4504 case 4:
4505 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4506 break;
4507 }
4508 adev->gfx.config.gb_addr_config = gb_addr_config;
4509}
4510
4511static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4512 int mec, int pipe, int queue)
4513{
4514 int r;
4515 unsigned irq_type;
4516 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4517
4518
4519 ring->me = mec + 1;
4520 ring->pipe = pipe;
4521 ring->queue = queue;
4522
4523 ring->ring_obj = NULL;
4524 ring->use_doorbell = true;
4525 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
4526 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4527
4528 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4529 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4530 + ring->pipe;
4531
4532
4533 r = amdgpu_ring_init(adev, ring, 1024,
4534 &adev->gfx.eop_irq, irq_type);
4535 if (r)
4536 return r;
4537
4538
4539 return 0;
4540}
4541
4542static int gfx_v7_0_sw_init(void *handle)
4543{
4544 struct amdgpu_ring *ring;
4545 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4546 int i, j, k, r, ring_id;
4547
4548 switch (adev->asic_type) {
4549 case CHIP_KAVERI:
4550 adev->gfx.mec.num_mec = 2;
4551 break;
4552 case CHIP_BONAIRE:
4553 case CHIP_HAWAII:
4554 case CHIP_KABINI:
4555 case CHIP_MULLINS:
4556 default:
4557 adev->gfx.mec.num_mec = 1;
4558 break;
4559 }
4560 adev->gfx.mec.num_pipe_per_mec = 4;
4561 adev->gfx.mec.num_queue_per_pipe = 8;
4562
4563
4564 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4565 if (r)
4566 return r;
4567
4568
4569 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
4570 &adev->gfx.priv_reg_irq);
4571 if (r)
4572 return r;
4573
4574
4575 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
4576 &adev->gfx.priv_inst_irq);
4577 if (r)
4578 return r;
4579
4580 gfx_v7_0_scratch_init(adev);
4581
4582 r = gfx_v7_0_init_microcode(adev);
4583 if (r) {
4584 DRM_ERROR("Failed to load gfx firmware!\n");
4585 return r;
4586 }
4587
4588 r = gfx_v7_0_rlc_init(adev);
4589 if (r) {
4590 DRM_ERROR("Failed to init rlc BOs!\n");
4591 return r;
4592 }
4593
4594
4595 r = gfx_v7_0_mec_init(adev);
4596 if (r) {
4597 DRM_ERROR("Failed to init MEC BOs!\n");
4598 return r;
4599 }
4600
4601 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4602 ring = &adev->gfx.gfx_ring[i];
4603 ring->ring_obj = NULL;
4604 sprintf(ring->name, "gfx");
4605 r = amdgpu_ring_init(adev, ring, 1024,
4606 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
4607 if (r)
4608 return r;
4609 }
4610
4611
4612 ring_id = 0;
4613 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4614 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4615 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4616 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4617 continue;
4618
4619 r = gfx_v7_0_compute_ring_init(adev,
4620 ring_id,
4621 i, k, j);
4622 if (r)
4623 return r;
4624
4625 ring_id++;
4626 }
4627 }
4628 }
4629
4630
4631 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
4632 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
4633 &adev->gds.gds_gfx_bo, NULL, NULL);
4634 if (r)
4635 return r;
4636
4637 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
4638 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
4639 &adev->gds.gws_gfx_bo, NULL, NULL);
4640 if (r)
4641 return r;
4642
4643 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
4644 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
4645 &adev->gds.oa_gfx_bo, NULL, NULL);
4646 if (r)
4647 return r;
4648
4649 adev->gfx.ce_ram_size = 0x8000;
4650
4651 gfx_v7_0_gpu_early_init(adev);
4652
4653 return r;
4654}
4655
4656static int gfx_v7_0_sw_fini(void *handle)
4657{
4658 int i;
4659 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4660
4661 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
4662 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
4663 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
4664
4665 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4666 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4667 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4668 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4669
4670 gfx_v7_0_cp_compute_fini(adev);
4671 gfx_v7_0_rlc_fini(adev);
4672 gfx_v7_0_mec_fini(adev);
4673 gfx_v7_0_free_microcode(adev);
4674
4675 return 0;
4676}
4677
4678static int gfx_v7_0_hw_init(void *handle)
4679{
4680 int r;
4681 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4682
4683 gfx_v7_0_gpu_init(adev);
4684
4685
4686 r = gfx_v7_0_rlc_resume(adev);
4687 if (r)
4688 return r;
4689
4690 r = gfx_v7_0_cp_resume(adev);
4691 if (r)
4692 return r;
4693
4694 return r;
4695}
4696
4697static int gfx_v7_0_hw_fini(void *handle)
4698{
4699 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4700
4701 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4702 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4703 gfx_v7_0_cp_enable(adev, false);
4704 gfx_v7_0_rlc_stop(adev);
4705 gfx_v7_0_fini_pg(adev);
4706
4707 return 0;
4708}
4709
4710static int gfx_v7_0_suspend(void *handle)
4711{
4712 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4713
4714 return gfx_v7_0_hw_fini(adev);
4715}
4716
4717static int gfx_v7_0_resume(void *handle)
4718{
4719 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4720
4721 return gfx_v7_0_hw_init(adev);
4722}
4723
4724static bool gfx_v7_0_is_idle(void *handle)
4725{
4726 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4727
4728 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4729 return false;
4730 else
4731 return true;
4732}
4733
4734static int gfx_v7_0_wait_for_idle(void *handle)
4735{
4736 unsigned i;
4737 u32 tmp;
4738 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4739
4740 for (i = 0; i < adev->usec_timeout; i++) {
4741
4742 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4743
4744 if (!tmp)
4745 return 0;
4746 udelay(1);
4747 }
4748 return -ETIMEDOUT;
4749}
4750
4751static int gfx_v7_0_soft_reset(void *handle)
4752{
4753 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4754 u32 tmp;
4755 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4756
4757
4758 tmp = RREG32(mmGRBM_STATUS);
4759 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4760 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4761 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4762 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4763 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4764 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4765 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4766 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4767
4768 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4769 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4770 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4771 }
4772
4773
4774 tmp = RREG32(mmGRBM_STATUS2);
4775 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4776 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4777
4778
4779 tmp = RREG32(mmSRBM_STATUS);
4780 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4781 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4782
4783 if (grbm_soft_reset || srbm_soft_reset) {
4784
4785 gfx_v7_0_fini_pg(adev);
4786 gfx_v7_0_update_cg(adev, false);
4787
4788
4789 gfx_v7_0_rlc_stop(adev);
4790
4791
4792 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4793
4794
4795 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4796
4797 if (grbm_soft_reset) {
4798 tmp = RREG32(mmGRBM_SOFT_RESET);
4799 tmp |= grbm_soft_reset;
4800 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4801 WREG32(mmGRBM_SOFT_RESET, tmp);
4802 tmp = RREG32(mmGRBM_SOFT_RESET);
4803
4804 udelay(50);
4805
4806 tmp &= ~grbm_soft_reset;
4807 WREG32(mmGRBM_SOFT_RESET, tmp);
4808 tmp = RREG32(mmGRBM_SOFT_RESET);
4809 }
4810
4811 if (srbm_soft_reset) {
4812 tmp = RREG32(mmSRBM_SOFT_RESET);
4813 tmp |= srbm_soft_reset;
4814 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4815 WREG32(mmSRBM_SOFT_RESET, tmp);
4816 tmp = RREG32(mmSRBM_SOFT_RESET);
4817
4818 udelay(50);
4819
4820 tmp &= ~srbm_soft_reset;
4821 WREG32(mmSRBM_SOFT_RESET, tmp);
4822 tmp = RREG32(mmSRBM_SOFT_RESET);
4823 }
4824
4825 udelay(50);
4826 }
4827 return 0;
4828}
4829
4830static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4831 enum amdgpu_interrupt_state state)
4832{
4833 u32 cp_int_cntl;
4834
4835 switch (state) {
4836 case AMDGPU_IRQ_STATE_DISABLE:
4837 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4838 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4839 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4840 break;
4841 case AMDGPU_IRQ_STATE_ENABLE:
4842 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4843 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4844 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4845 break;
4846 default:
4847 break;
4848 }
4849}
4850
4851static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4852 int me, int pipe,
4853 enum amdgpu_interrupt_state state)
4854{
4855 u32 mec_int_cntl, mec_int_cntl_reg;
4856
4857
4858
4859
4860
4861
4862
4863 if (me == 1) {
4864 switch (pipe) {
4865 case 0:
4866 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4867 break;
4868 case 1:
4869 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4870 break;
4871 case 2:
4872 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4873 break;
4874 case 3:
4875 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4876 break;
4877 default:
4878 DRM_DEBUG("invalid pipe %d\n", pipe);
4879 return;
4880 }
4881 } else {
4882 DRM_DEBUG("invalid me %d\n", me);
4883 return;
4884 }
4885
4886 switch (state) {
4887 case AMDGPU_IRQ_STATE_DISABLE:
4888 mec_int_cntl = RREG32(mec_int_cntl_reg);
4889 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4890 WREG32(mec_int_cntl_reg, mec_int_cntl);
4891 break;
4892 case AMDGPU_IRQ_STATE_ENABLE:
4893 mec_int_cntl = RREG32(mec_int_cntl_reg);
4894 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4895 WREG32(mec_int_cntl_reg, mec_int_cntl);
4896 break;
4897 default:
4898 break;
4899 }
4900}
4901
4902static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4903 struct amdgpu_irq_src *src,
4904 unsigned type,
4905 enum amdgpu_interrupt_state state)
4906{
4907 u32 cp_int_cntl;
4908
4909 switch (state) {
4910 case AMDGPU_IRQ_STATE_DISABLE:
4911 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4912 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4913 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4914 break;
4915 case AMDGPU_IRQ_STATE_ENABLE:
4916 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4917 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4918 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4919 break;
4920 default:
4921 break;
4922 }
4923
4924 return 0;
4925}
4926
4927static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4928 struct amdgpu_irq_src *src,
4929 unsigned type,
4930 enum amdgpu_interrupt_state state)
4931{
4932 u32 cp_int_cntl;
4933
4934 switch (state) {
4935 case AMDGPU_IRQ_STATE_DISABLE:
4936 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4937 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4938 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4939 break;
4940 case AMDGPU_IRQ_STATE_ENABLE:
4941 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4942 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4943 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4944 break;
4945 default:
4946 break;
4947 }
4948
4949 return 0;
4950}
4951
4952static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4953 struct amdgpu_irq_src *src,
4954 unsigned type,
4955 enum amdgpu_interrupt_state state)
4956{
4957 switch (type) {
4958 case AMDGPU_CP_IRQ_GFX_EOP:
4959 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4960 break;
4961 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4962 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4963 break;
4964 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4965 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4966 break;
4967 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4968 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4969 break;
4970 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4971 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4972 break;
4973 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4974 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4975 break;
4976 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4977 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4978 break;
4979 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4980 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4981 break;
4982 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4983 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4984 break;
4985 default:
4986 break;
4987 }
4988 return 0;
4989}
4990
4991static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4992 struct amdgpu_irq_src *source,
4993 struct amdgpu_iv_entry *entry)
4994{
4995 u8 me_id, pipe_id;
4996 struct amdgpu_ring *ring;
4997 int i;
4998
4999 DRM_DEBUG("IH: CP EOP\n");
5000 me_id = (entry->ring_id & 0x0c) >> 2;
5001 pipe_id = (entry->ring_id & 0x03) >> 0;
5002 switch (me_id) {
5003 case 0:
5004 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5005 break;
5006 case 1:
5007 case 2:
5008 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5009 ring = &adev->gfx.compute_ring[i];
5010 if ((ring->me == me_id) && (ring->pipe == pipe_id))
5011 amdgpu_fence_process(ring);
5012 }
5013 break;
5014 }
5015 return 0;
5016}
5017
5018static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
5019 struct amdgpu_irq_src *source,
5020 struct amdgpu_iv_entry *entry)
5021{
5022 DRM_ERROR("Illegal register access in command stream\n");
5023 schedule_work(&adev->reset_work);
5024 return 0;
5025}
5026
5027static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
5028 struct amdgpu_irq_src *source,
5029 struct amdgpu_iv_entry *entry)
5030{
5031 DRM_ERROR("Illegal instruction in command stream\n");
5032
5033 schedule_work(&adev->reset_work);
5034 return 0;
5035}
5036
5037static int gfx_v7_0_set_clockgating_state(void *handle,
5038 enum amd_clockgating_state state)
5039{
5040 bool gate = false;
5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043 if (state == AMD_CG_STATE_GATE)
5044 gate = true;
5045
5046 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
5047
5048 if (gate) {
5049 gfx_v7_0_enable_mgcg(adev, true);
5050 gfx_v7_0_enable_cgcg(adev, true);
5051 } else {
5052 gfx_v7_0_enable_cgcg(adev, false);
5053 gfx_v7_0_enable_mgcg(adev, false);
5054 }
5055 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
5056
5057 return 0;
5058}
5059
5060static int gfx_v7_0_set_powergating_state(void *handle,
5061 enum amd_powergating_state state)
5062{
5063 bool gate = false;
5064 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5065
5066 if (state == AMD_PG_STATE_GATE)
5067 gate = true;
5068
5069 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
5070 AMD_PG_SUPPORT_GFX_SMG |
5071 AMD_PG_SUPPORT_GFX_DMG |
5072 AMD_PG_SUPPORT_CP |
5073 AMD_PG_SUPPORT_GDS |
5074 AMD_PG_SUPPORT_RLC_SMU_HS)) {
5075 gfx_v7_0_update_gfx_pg(adev, gate);
5076 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
5077 gfx_v7_0_enable_cp_pg(adev, gate);
5078 gfx_v7_0_enable_gds_pg(adev, gate);
5079 }
5080 }
5081
5082 return 0;
5083}
5084
5085static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5086 .name = "gfx_v7_0",
5087 .early_init = gfx_v7_0_early_init,
5088 .late_init = gfx_v7_0_late_init,
5089 .sw_init = gfx_v7_0_sw_init,
5090 .sw_fini = gfx_v7_0_sw_fini,
5091 .hw_init = gfx_v7_0_hw_init,
5092 .hw_fini = gfx_v7_0_hw_fini,
5093 .suspend = gfx_v7_0_suspend,
5094 .resume = gfx_v7_0_resume,
5095 .is_idle = gfx_v7_0_is_idle,
5096 .wait_for_idle = gfx_v7_0_wait_for_idle,
5097 .soft_reset = gfx_v7_0_soft_reset,
5098 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5099 .set_powergating_state = gfx_v7_0_set_powergating_state,
5100};
5101
5102static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5103 .type = AMDGPU_RING_TYPE_GFX,
5104 .align_mask = 0xff,
5105 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5106 .support_64bit_ptrs = false,
5107 .get_rptr = gfx_v7_0_ring_get_rptr,
5108 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5109 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5110 .emit_frame_size =
5111 20 +
5112 7 +
5113 5 +
5114 12 + 12 + 12 +
5115 7 + 4 +
5116 17 + 6 +
5117 3 + 4,
5118 .emit_ib_size = 4,
5119 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5120 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5121 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5122 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5123 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5124 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5125 .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
5126 .test_ring = gfx_v7_0_ring_test_ring,
5127 .test_ib = gfx_v7_0_ring_test_ib,
5128 .insert_nop = amdgpu_ring_insert_nop,
5129 .pad_ib = amdgpu_ring_generic_pad_ib,
5130 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5131};
5132
5133static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5134 .type = AMDGPU_RING_TYPE_COMPUTE,
5135 .align_mask = 0xff,
5136 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5137 .support_64bit_ptrs = false,
5138 .get_rptr = gfx_v7_0_ring_get_rptr,
5139 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5140 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5141 .emit_frame_size =
5142 20 +
5143 7 +
5144 5 +
5145 7 +
5146 17 +
5147 7 + 7 + 7,
5148 .emit_ib_size = 4,
5149 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5150 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5151 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5152 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5153 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5154 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5155 .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
5156 .test_ring = gfx_v7_0_ring_test_ring,
5157 .test_ib = gfx_v7_0_ring_test_ib,
5158 .insert_nop = amdgpu_ring_insert_nop,
5159 .pad_ib = amdgpu_ring_generic_pad_ib,
5160};
5161
5162static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5163{
5164 int i;
5165
5166 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5167 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5168 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5169 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5170}
5171
5172static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5173 .set = gfx_v7_0_set_eop_interrupt_state,
5174 .process = gfx_v7_0_eop_irq,
5175};
5176
5177static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5178 .set = gfx_v7_0_set_priv_reg_fault_state,
5179 .process = gfx_v7_0_priv_reg_irq,
5180};
5181
5182static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5183 .set = gfx_v7_0_set_priv_inst_fault_state,
5184 .process = gfx_v7_0_priv_inst_irq,
5185};
5186
5187static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5188{
5189 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5190 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5191
5192 adev->gfx.priv_reg_irq.num_types = 1;
5193 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5194
5195 adev->gfx.priv_inst_irq.num_types = 1;
5196 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5197}
5198
5199static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5200{
5201
5202 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5203 adev->gds.gws.total_size = 64;
5204 adev->gds.oa.total_size = 16;
5205
5206 if (adev->gds.mem.total_size == 64 * 1024) {
5207 adev->gds.mem.gfx_partition_size = 4096;
5208 adev->gds.mem.cs_partition_size = 4096;
5209
5210 adev->gds.gws.gfx_partition_size = 4;
5211 adev->gds.gws.cs_partition_size = 4;
5212
5213 adev->gds.oa.gfx_partition_size = 4;
5214 adev->gds.oa.cs_partition_size = 1;
5215 } else {
5216 adev->gds.mem.gfx_partition_size = 1024;
5217 adev->gds.mem.cs_partition_size = 1024;
5218
5219 adev->gds.gws.gfx_partition_size = 16;
5220 adev->gds.gws.cs_partition_size = 16;
5221
5222 adev->gds.oa.gfx_partition_size = 4;
5223 adev->gds.oa.cs_partition_size = 4;
5224 }
5225}
5226
5227
5228static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5229{
5230 int i, j, k, counter, active_cu_number = 0;
5231 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5232 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5233 unsigned disable_masks[4 * 2];
5234 u32 ao_cu_num;
5235
5236 if (adev->flags & AMD_IS_APU)
5237 ao_cu_num = 2;
5238 else
5239 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5240
5241 memset(cu_info, 0, sizeof(*cu_info));
5242
5243 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5244
5245 mutex_lock(&adev->grbm_idx_mutex);
5246 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5247 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5248 mask = 1;
5249 ao_bitmap = 0;
5250 counter = 0;
5251 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5252 if (i < 4 && j < 2)
5253 gfx_v7_0_set_user_cu_inactive_bitmap(
5254 adev, disable_masks[i * 2 + j]);
5255 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5256 cu_info->bitmap[i][j] = bitmap;
5257
5258 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5259 if (bitmap & mask) {
5260 if (counter < ao_cu_num)
5261 ao_bitmap |= mask;
5262 counter ++;
5263 }
5264 mask <<= 1;
5265 }
5266 active_cu_number += counter;
5267 if (i < 2 && j < 2)
5268 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5269 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5270 }
5271 }
5272 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5273 mutex_unlock(&adev->grbm_idx_mutex);
5274
5275 cu_info->number = active_cu_number;
5276 cu_info->ao_cu_mask = ao_cu_mask;
5277}
5278
5279const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5280{
5281 .type = AMD_IP_BLOCK_TYPE_GFX,
5282 .major = 7,
5283 .minor = 0,
5284 .rev = 0,
5285 .funcs = &gfx_v7_0_ip_funcs,
5286};
5287
5288const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5289{
5290 .type = AMD_IP_BLOCK_TYPE_GFX,
5291 .major = 7,
5292 .minor = 1,
5293 .rev = 0,
5294 .funcs = &gfx_v7_0_ip_funcs,
5295};
5296
5297const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5298{
5299 .type = AMD_IP_BLOCK_TYPE_GFX,
5300 .major = 7,
5301 .minor = 2,
5302 .rev = 0,
5303 .funcs = &gfx_v7_0_ip_funcs,
5304};
5305
5306const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5307{
5308 .type = AMD_IP_BLOCK_TYPE_GFX,
5309 .major = 7,
5310 .minor = 3,
5311 .rev = 0,
5312 .funcs = &gfx_v7_0_ip_funcs,
5313};
5314