1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26
27#include "amdgpu.h"
28#include "amdgpu_ih.h"
29#include "amdgpu_gfx.h"
30#include "cikd.h"
31#include "cik.h"
32#include "cik_structs.h"
33#include "atom.h"
34#include "amdgpu_ucode.h"
35#include "clearstate_ci.h"
36
37#include "dce/dce_8_0_d.h"
38#include "dce/dce_8_0_sh_mask.h"
39
40#include "bif/bif_4_1_d.h"
41#include "bif/bif_4_1_sh_mask.h"
42
43#include "gca/gfx_7_0_d.h"
44#include "gca/gfx_7_2_enum.h"
45#include "gca/gfx_7_2_sh_mask.h"
46
47#include "gmc/gmc_7_0_d.h"
48#include "gmc/gmc_7_0_sh_mask.h"
49
50#include "oss/oss_2_0_d.h"
51#include "oss/oss_2_0_sh_mask.h"
52
53#define NUM_SIMD_PER_CU 0x4
54
55#define GFX7_NUM_GFX_RINGS 1
56#define GFX7_MEC_HPD_SIZE 2048
57
58static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
59static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
60static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
61
62MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
63MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
64MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
65MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
66MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
67
68MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
69MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
70MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
71MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
72MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
73
74MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
75MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
76MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
77MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
78MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
79MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
80
81MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
82MODULE_FIRMWARE("amdgpu/kabini_me.bin");
83MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
84MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
85MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
86
87MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
88MODULE_FIRMWARE("amdgpu/mullins_me.bin");
89MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
90MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
91MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
92
93static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
94{
95 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
96 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
97 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
98 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
99 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
100 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
101 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
102 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
103 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
104 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
105 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
106 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
107 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
108 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
109 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
110 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
111};
112
113static const u32 spectre_rlc_save_restore_register_list[] =
114{
115 (0x0e00 << 16) | (0xc12c >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc140 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc150 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc15c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc168 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc170 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc178 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc204 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b4 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2b8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2bc >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc2c0 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8228 >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x829c >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0x869c >> 2),
144 0x00000000,
145 (0x0600 << 16) | (0x98f4 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x98f8 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0x9900 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc260 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x90e8 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c000 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x3c00c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x8c1c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x9700 >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x4e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x5e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x6e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x7e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x8e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0x9e00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xae00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0xbe00 << 16) | (0xcd20 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x89bc >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0x8900 >> 2),
184 0x00000000,
185 0x3,
186 (0x0e00 << 16) | (0xc130 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc134 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc1fc >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc208 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc264 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc268 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc26c >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc270 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc274 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc278 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc27c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc280 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc284 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc288 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc28c >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc290 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc294 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc298 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc29c >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a0 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a4 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2a8 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2ac >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc2b0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x301d0 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30238 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30250 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30254 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x30258 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0x3025c >> 2),
245 0x00000000,
246 (0x4e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x5e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x6e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x7e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x8e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0x9e00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xae00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0xbe00 << 16) | (0xc900 >> 2),
261 0x00000000,
262 (0x4e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x5e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x6e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x7e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x8e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0x9e00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xae00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0xbe00 << 16) | (0xc904 >> 2),
277 0x00000000,
278 (0x4e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x5e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x6e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x7e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x8e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0x9e00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xae00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0xbe00 << 16) | (0xc908 >> 2),
293 0x00000000,
294 (0x4e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x5e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x6e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x7e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x8e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0x9e00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xae00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0xbe00 << 16) | (0xc90c >> 2),
309 0x00000000,
310 (0x4e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x5e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x6e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x7e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x8e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0x9e00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xae00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0xbe00 << 16) | (0xc910 >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0xc99c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x9834 >> 2),
329 0x00000000,
330 (0x0000 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0001 << 16) | (0x30f00 >> 2),
333 0x00000000,
334 (0x0000 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0001 << 16) | (0x30f04 >> 2),
337 0x00000000,
338 (0x0000 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0001 << 16) | (0x30f08 >> 2),
341 0x00000000,
342 (0x0000 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0001 << 16) | (0x30f0c >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x9b7c >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a14 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0x8a18 >> 2),
351 0x00000000,
352 (0x0600 << 16) | (0x30a00 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bf0 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8bcc >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x8b24 >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0x30a04 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a10 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a14 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a18 >> 2),
367 0x00000000,
368 (0x0600 << 16) | (0x30a2c >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc700 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc704 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc708 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0xc768 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc770 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc774 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc778 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc77c >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc780 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc784 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc788 >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc78c >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc798 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc79c >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a0 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a4 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7a8 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7ac >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b0 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc7b4 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x9100 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x3c010 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92a8 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92ac >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b4 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92b8 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92bc >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c0 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92c8 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92cc >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92d0 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c00 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c04 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c20 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c38 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x8c3c >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xae00 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x9604 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac08 >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac0c >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac10 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac14 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac58 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac68 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac6c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac70 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac74 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac78 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac7c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac80 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac84 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac88 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac8c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x970c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9714 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x9718 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x971c >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x4e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x5e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x6e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x7e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x8e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0x9e00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xae00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0xbe00 << 16) | (0x31068 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd10 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0xcd14 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b0 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b4 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88b8 >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x88bc >> 2),
515 0x00000000,
516 (0x0400 << 16) | (0x89c0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c4 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88c8 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d0 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d4 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88d8 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x8980 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x30938 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x3093c >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x30940 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x89a0 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30900 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x30904 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x89b4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c210 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c214 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x3c218 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x8904 >> 2),
551 0x00000000,
552 0x5,
553 (0x0e00 << 16) | (0x8c28 >> 2),
554 (0x0e00 << 16) | (0x8c2c >> 2),
555 (0x0e00 << 16) | (0x8c30 >> 2),
556 (0x0e00 << 16) | (0x8c34 >> 2),
557 (0x0e00 << 16) | (0x9600 >> 2),
558};
559
560static const u32 kalindi_rlc_save_restore_register_list[] =
561{
562 (0x0e00 << 16) | (0xc12c >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc140 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc150 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc15c >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc168 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc170 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc204 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b4 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2b8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2bc >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc2c0 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8228 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x829c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x869c >> 2),
589 0x00000000,
590 (0x0600 << 16) | (0x98f4 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x98f8 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x9900 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc260 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x90e8 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c000 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x3c00c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x8c1c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x9700 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x4e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x5e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x6e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x7e00 << 16) | (0xcd20 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x89bc >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x8900 >> 2),
621 0x00000000,
622 0x3,
623 (0x0e00 << 16) | (0xc130 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc134 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc1fc >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc208 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc264 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc268 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc26c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc270 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc274 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc28c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc290 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc294 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc298 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a0 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a4 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2a8 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc2ac >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x301d0 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30238 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30250 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30254 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x30258 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x3025c >> 2),
668 0x00000000,
669 (0x4e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x5e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x6e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x7e00 << 16) | (0xc900 >> 2),
676 0x00000000,
677 (0x4e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x5e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x6e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x7e00 << 16) | (0xc904 >> 2),
684 0x00000000,
685 (0x4e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x5e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x6e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x7e00 << 16) | (0xc908 >> 2),
692 0x00000000,
693 (0x4e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x5e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x6e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x7e00 << 16) | (0xc90c >> 2),
700 0x00000000,
701 (0x4e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x5e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x6e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x7e00 << 16) | (0xc910 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0xc99c >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x9834 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f00 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f04 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f08 >> 2),
718 0x00000000,
719 (0x0000 << 16) | (0x30f0c >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x9b7c >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a14 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0x8a18 >> 2),
726 0x00000000,
727 (0x0600 << 16) | (0x30a00 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bf0 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8bcc >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8b24 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x30a04 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a10 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a14 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a18 >> 2),
742 0x00000000,
743 (0x0600 << 16) | (0x30a2c >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc700 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc704 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc708 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0xc768 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc770 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc774 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc798 >> 2),
758 0x00000000,
759 (0x0400 << 16) | (0xc79c >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x9100 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x3c010 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c00 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c04 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c20 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c38 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0x8c3c >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xae00 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0x9604 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac08 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac0c >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac10 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac14 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac58 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac68 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac6c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac70 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac74 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac78 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac7c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac80 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac84 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac88 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac8c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x970c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9714 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x9718 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x971c >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x4e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x5e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x6e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x7e00 << 16) | (0x31068 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd10 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xcd14 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b0 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b4 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88b8 >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x88bc >> 2),
838 0x00000000,
839 (0x0400 << 16) | (0x89c0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c4 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88c8 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d0 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d4 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88d8 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x8980 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x30938 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x3093c >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x30940 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x89a0 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30900 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x30904 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x89b4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3e1fc >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c210 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c214 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3c218 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x8904 >> 2),
876 0x00000000,
877 0x5,
878 (0x0e00 << 16) | (0x8c28 >> 2),
879 (0x0e00 << 16) | (0x8c2c >> 2),
880 (0x0e00 << 16) | (0x8c30 >> 2),
881 (0x0e00 << 16) | (0x8c34 >> 2),
882 (0x0e00 << 16) | (0x9600 >> 2),
883};
884
885static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
886static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
887static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
888static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
889
890
891
892
893
894
895
896
897
898
899
900
901
902static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
903{
904 const char *chip_name;
905 char fw_name[30];
906 int err;
907
908 DRM_DEBUG("\n");
909
910 switch (adev->asic_type) {
911 case CHIP_BONAIRE:
912 chip_name = "bonaire";
913 break;
914 case CHIP_HAWAII:
915 chip_name = "hawaii";
916 break;
917 case CHIP_KAVERI:
918 chip_name = "kaveri";
919 break;
920 case CHIP_KABINI:
921 chip_name = "kabini";
922 break;
923 case CHIP_MULLINS:
924 chip_name = "mullins";
925 break;
926 default: BUG();
927 }
928
929 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
930 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
931 if (err)
932 goto out;
933 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
934 if (err)
935 goto out;
936
937 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
938 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
939 if (err)
940 goto out;
941 err = amdgpu_ucode_validate(adev->gfx.me_fw);
942 if (err)
943 goto out;
944
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
946 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
947 if (err)
948 goto out;
949 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
950 if (err)
951 goto out;
952
953 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
954 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
955 if (err)
956 goto out;
957 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
958 if (err)
959 goto out;
960
961 if (adev->asic_type == CHIP_KAVERI) {
962 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
963 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 if (err)
965 goto out;
966 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 if (err)
968 goto out;
969 }
970
971 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
972 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
973 if (err)
974 goto out;
975 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
976
977out:
978 if (err) {
979 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
980 release_firmware(adev->gfx.pfp_fw);
981 adev->gfx.pfp_fw = NULL;
982 release_firmware(adev->gfx.me_fw);
983 adev->gfx.me_fw = NULL;
984 release_firmware(adev->gfx.ce_fw);
985 adev->gfx.ce_fw = NULL;
986 release_firmware(adev->gfx.mec_fw);
987 adev->gfx.mec_fw = NULL;
988 release_firmware(adev->gfx.mec2_fw);
989 adev->gfx.mec2_fw = NULL;
990 release_firmware(adev->gfx.rlc_fw);
991 adev->gfx.rlc_fw = NULL;
992 }
993 return err;
994}
995
996static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
997{
998 release_firmware(adev->gfx.pfp_fw);
999 adev->gfx.pfp_fw = NULL;
1000 release_firmware(adev->gfx.me_fw);
1001 adev->gfx.me_fw = NULL;
1002 release_firmware(adev->gfx.ce_fw);
1003 adev->gfx.ce_fw = NULL;
1004 release_firmware(adev->gfx.mec_fw);
1005 adev->gfx.mec_fw = NULL;
1006 release_firmware(adev->gfx.mec2_fw);
1007 adev->gfx.mec2_fw = NULL;
1008 release_firmware(adev->gfx.rlc_fw);
1009 adev->gfx.rlc_fw = NULL;
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1024{
1025 const u32 num_tile_mode_states =
1026 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1027 const u32 num_secondary_tile_mode_states =
1028 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1029 u32 reg_offset, split_equal_to_row_size;
1030 uint32_t *tile, *macrotile;
1031
1032 tile = adev->gfx.config.tile_mode_array;
1033 macrotile = adev->gfx.config.macrotile_mode_array;
1034
1035 switch (adev->gfx.config.mem_row_size_in_kb) {
1036 case 1:
1037 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1038 break;
1039 case 2:
1040 default:
1041 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1042 break;
1043 case 4:
1044 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1045 break;
1046 }
1047
1048 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1049 tile[reg_offset] = 0;
1050 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1051 macrotile[reg_offset] = 0;
1052
1053 switch (adev->asic_type) {
1054 case CHIP_BONAIRE:
1055 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1059 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1060 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1063 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1067 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1071 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1073 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1074 TILE_SPLIT(split_equal_to_row_size));
1075 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1076 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1078 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1081 TILE_SPLIT(split_equal_to_row_size));
1082 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1083 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1084 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1085 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1088 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1092 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1096 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1097 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1098 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1100 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1104 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1105 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1108 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1110 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1112 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1113 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1117 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1120 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1121 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1124 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1125 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1128 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1129 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1132 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1133 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1134 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1137 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1142 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1145 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1146 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1148 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1150 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1152 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1156 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1157
1158 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1161 NUM_BANKS(ADDR_SURF_16_BANK));
1162 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1169 NUM_BANKS(ADDR_SURF_16_BANK));
1170 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1173 NUM_BANKS(ADDR_SURF_16_BANK));
1174 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1177 NUM_BANKS(ADDR_SURF_16_BANK));
1178 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1181 NUM_BANKS(ADDR_SURF_8_BANK));
1182 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1185 NUM_BANKS(ADDR_SURF_4_BANK));
1186 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1189 NUM_BANKS(ADDR_SURF_16_BANK));
1190 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1193 NUM_BANKS(ADDR_SURF_16_BANK));
1194 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1201 NUM_BANKS(ADDR_SURF_16_BANK));
1202 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1205 NUM_BANKS(ADDR_SURF_16_BANK));
1206 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1209 NUM_BANKS(ADDR_SURF_8_BANK));
1210 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1213 NUM_BANKS(ADDR_SURF_4_BANK));
1214
1215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1216 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1217 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1218 if (reg_offset != 7)
1219 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1220 break;
1221 case CHIP_HAWAII:
1222 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1226 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1227 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1230 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1231 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1234 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1238 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1241 TILE_SPLIT(split_equal_to_row_size));
1242 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1245 TILE_SPLIT(split_equal_to_row_size));
1246 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1249 TILE_SPLIT(split_equal_to_row_size));
1250 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1253 TILE_SPLIT(split_equal_to_row_size));
1254 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1256 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1259 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1260 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1263 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1264 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1267 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1268 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1271 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1272 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1274 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1278 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1282 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1286 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1287 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1290 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1294 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1297 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1301 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1302 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1305 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1309 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1310 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1313 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1317 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1321 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1325 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1328 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1336 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1340
1341 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1344 NUM_BANKS(ADDR_SURF_16_BANK));
1345 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1348 NUM_BANKS(ADDR_SURF_16_BANK));
1349 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1352 NUM_BANKS(ADDR_SURF_16_BANK));
1353 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1356 NUM_BANKS(ADDR_SURF_16_BANK));
1357 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1360 NUM_BANKS(ADDR_SURF_8_BANK));
1361 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1364 NUM_BANKS(ADDR_SURF_4_BANK));
1365 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1368 NUM_BANKS(ADDR_SURF_4_BANK));
1369 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1372 NUM_BANKS(ADDR_SURF_16_BANK));
1373 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1376 NUM_BANKS(ADDR_SURF_16_BANK));
1377 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1380 NUM_BANKS(ADDR_SURF_16_BANK));
1381 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1384 NUM_BANKS(ADDR_SURF_8_BANK));
1385 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1388 NUM_BANKS(ADDR_SURF_16_BANK));
1389 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1392 NUM_BANKS(ADDR_SURF_8_BANK));
1393 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1396 NUM_BANKS(ADDR_SURF_4_BANK));
1397
1398 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1399 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1401 if (reg_offset != 7)
1402 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1403 break;
1404 case CHIP_KABINI:
1405 case CHIP_KAVERI:
1406 case CHIP_MULLINS:
1407 default:
1408 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1409 PIPE_CONFIG(ADDR_SURF_P2) |
1410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1412 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1413 PIPE_CONFIG(ADDR_SURF_P2) |
1414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1416 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1417 PIPE_CONFIG(ADDR_SURF_P2) |
1418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1420 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1421 PIPE_CONFIG(ADDR_SURF_P2) |
1422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1424 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1425 PIPE_CONFIG(ADDR_SURF_P2) |
1426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1427 TILE_SPLIT(split_equal_to_row_size));
1428 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1429 PIPE_CONFIG(ADDR_SURF_P2) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1431 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1432 PIPE_CONFIG(ADDR_SURF_P2) |
1433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1434 TILE_SPLIT(split_equal_to_row_size));
1435 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1436 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1437 PIPE_CONFIG(ADDR_SURF_P2));
1438 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1439 PIPE_CONFIG(ADDR_SURF_P2) |
1440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1441 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1442 PIPE_CONFIG(ADDR_SURF_P2) |
1443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1445 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1446 PIPE_CONFIG(ADDR_SURF_P2) |
1447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1449 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1450 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1451 PIPE_CONFIG(ADDR_SURF_P2) |
1452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1453 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1454 PIPE_CONFIG(ADDR_SURF_P2) |
1455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1457 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1458 PIPE_CONFIG(ADDR_SURF_P2) |
1459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1461 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1462 PIPE_CONFIG(ADDR_SURF_P2) |
1463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1465 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1466 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1467 PIPE_CONFIG(ADDR_SURF_P2) |
1468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1470 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1471 PIPE_CONFIG(ADDR_SURF_P2) |
1472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1473 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1474 PIPE_CONFIG(ADDR_SURF_P2) |
1475 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1477 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1478 PIPE_CONFIG(ADDR_SURF_P2) |
1479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1481 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1482 PIPE_CONFIG(ADDR_SURF_P2) |
1483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1485 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1486 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1487 PIPE_CONFIG(ADDR_SURF_P2) |
1488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1490 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1491 PIPE_CONFIG(ADDR_SURF_P2) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1494 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1495 PIPE_CONFIG(ADDR_SURF_P2) |
1496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1498 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1501 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1502 PIPE_CONFIG(ADDR_SURF_P2) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1505 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1506 PIPE_CONFIG(ADDR_SURF_P2) |
1507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1509 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1510
1511 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1514 NUM_BANKS(ADDR_SURF_8_BANK));
1515 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1518 NUM_BANKS(ADDR_SURF_8_BANK));
1519 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1522 NUM_BANKS(ADDR_SURF_8_BANK));
1523 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1526 NUM_BANKS(ADDR_SURF_8_BANK));
1527 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1530 NUM_BANKS(ADDR_SURF_8_BANK));
1531 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1534 NUM_BANKS(ADDR_SURF_8_BANK));
1535 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1538 NUM_BANKS(ADDR_SURF_8_BANK));
1539 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1542 NUM_BANKS(ADDR_SURF_16_BANK));
1543 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1546 NUM_BANKS(ADDR_SURF_16_BANK));
1547 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1550 NUM_BANKS(ADDR_SURF_16_BANK));
1551 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1554 NUM_BANKS(ADDR_SURF_16_BANK));
1555 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1558 NUM_BANKS(ADDR_SURF_16_BANK));
1559 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1562 NUM_BANKS(ADDR_SURF_16_BANK));
1563 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1566 NUM_BANKS(ADDR_SURF_8_BANK));
1567
1568 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1569 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1571 if (reg_offset != 7)
1572 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1573 break;
1574 }
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1589 u32 se_num, u32 sh_num, u32 instance)
1590{
1591 u32 data;
1592
1593 if (instance == 0xffffffff)
1594 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1595 else
1596 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1597
1598 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1599 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1600 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1601 else if (se_num == 0xffffffff)
1602 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1603 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1604 else if (sh_num == 0xffffffff)
1605 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1606 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1607 else
1608 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1609 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1610 WREG32(mmGRBM_GFX_INDEX, data);
1611}
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1622{
1623 u32 data, mask;
1624
1625 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1626 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1627
1628 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1629 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1630
1631 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1632 adev->gfx.config.max_sh_per_se);
1633
1634 return (~data) & mask;
1635}
1636
1637static void
1638gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1639{
1640 switch (adev->asic_type) {
1641 case CHIP_BONAIRE:
1642 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1643 SE_XSEL(1) | SE_YSEL(1);
1644 *rconf1 |= 0x0;
1645 break;
1646 case CHIP_HAWAII:
1647 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1648 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1649 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1650 SE_YSEL(3);
1651 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1652 SE_PAIR_YSEL(2);
1653 break;
1654 case CHIP_KAVERI:
1655 *rconf |= RB_MAP_PKR0(2);
1656 *rconf1 |= 0x0;
1657 break;
1658 case CHIP_KABINI:
1659 case CHIP_MULLINS:
1660 *rconf |= 0x0;
1661 *rconf1 |= 0x0;
1662 break;
1663 default:
1664 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1665 break;
1666 }
1667}
1668
1669static void
1670gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1671 u32 raster_config, u32 raster_config_1,
1672 unsigned rb_mask, unsigned num_rb)
1673{
1674 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1675 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1676 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1677 unsigned rb_per_se = num_rb / num_se;
1678 unsigned se_mask[4];
1679 unsigned se;
1680
1681 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1682 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1683 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1684 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1685
1686 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1687 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1688 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1689
1690 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1691 (!se_mask[2] && !se_mask[3]))) {
1692 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1693
1694 if (!se_mask[0] && !se_mask[1]) {
1695 raster_config_1 |=
1696 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1697 } else {
1698 raster_config_1 |=
1699 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1700 }
1701 }
1702
1703 for (se = 0; se < num_se; se++) {
1704 unsigned raster_config_se = raster_config;
1705 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1706 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1707 int idx = (se / 2) * 2;
1708
1709 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1710 raster_config_se &= ~SE_MAP_MASK;
1711
1712 if (!se_mask[idx]) {
1713 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1714 } else {
1715 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1716 }
1717 }
1718
1719 pkr0_mask &= rb_mask;
1720 pkr1_mask &= rb_mask;
1721 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1722 raster_config_se &= ~PKR_MAP_MASK;
1723
1724 if (!pkr0_mask) {
1725 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1726 } else {
1727 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1728 }
1729 }
1730
1731 if (rb_per_se >= 2) {
1732 unsigned rb0_mask = 1 << (se * rb_per_se);
1733 unsigned rb1_mask = rb0_mask << 1;
1734
1735 rb0_mask &= rb_mask;
1736 rb1_mask &= rb_mask;
1737 if (!rb0_mask || !rb1_mask) {
1738 raster_config_se &= ~RB_MAP_PKR0_MASK;
1739
1740 if (!rb0_mask) {
1741 raster_config_se |=
1742 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1743 } else {
1744 raster_config_se |=
1745 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1746 }
1747 }
1748
1749 if (rb_per_se > 2) {
1750 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1751 rb1_mask = rb0_mask << 1;
1752 rb0_mask &= rb_mask;
1753 rb1_mask &= rb_mask;
1754 if (!rb0_mask || !rb1_mask) {
1755 raster_config_se &= ~RB_MAP_PKR1_MASK;
1756
1757 if (!rb0_mask) {
1758 raster_config_se |=
1759 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1760 } else {
1761 raster_config_se |=
1762 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1763 }
1764 }
1765 }
1766 }
1767
1768
1769 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1770 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1771 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1772 }
1773
1774
1775 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1776}
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1788{
1789 int i, j;
1790 u32 data;
1791 u32 raster_config = 0, raster_config_1 = 0;
1792 u32 active_rbs = 0;
1793 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1794 adev->gfx.config.max_sh_per_se;
1795 unsigned num_rb_pipes;
1796
1797 mutex_lock(&adev->grbm_idx_mutex);
1798 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1799 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1800 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1801 data = gfx_v7_0_get_rb_active_bitmap(adev);
1802 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1803 rb_bitmap_width_per_sh);
1804 }
1805 }
1806 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1807
1808 adev->gfx.config.backend_enable_mask = active_rbs;
1809 adev->gfx.config.num_rbs = hweight32(active_rbs);
1810
1811 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1812 adev->gfx.config.max_shader_engines, 16);
1813
1814 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1815
1816 if (!adev->gfx.config.backend_enable_mask ||
1817 adev->gfx.config.num_rbs >= num_rb_pipes) {
1818 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1819 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1820 } else {
1821 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1822 adev->gfx.config.backend_enable_mask,
1823 num_rb_pipes);
1824 }
1825
1826
1827 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1828 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1829 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1830 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1831 RREG32(mmCC_RB_BACKEND_DISABLE);
1832 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1833 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1834 adev->gfx.config.rb_config[i][j].raster_config =
1835 RREG32(mmPA_SC_RASTER_CONFIG);
1836 adev->gfx.config.rb_config[i][j].raster_config_1 =
1837 RREG32(mmPA_SC_RASTER_CONFIG_1);
1838 }
1839 }
1840 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1841 mutex_unlock(&adev->grbm_idx_mutex);
1842}
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852#define DEFAULT_SH_MEM_BASES (0x6000)
1853#define FIRST_COMPUTE_VMID (8)
1854#define LAST_COMPUTE_VMID (16)
1855static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1856{
1857 int i;
1858 uint32_t sh_mem_config;
1859 uint32_t sh_mem_bases;
1860
1861
1862
1863
1864
1865
1866
1867 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1868 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1869 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1870 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1871 mutex_lock(&adev->srbm_mutex);
1872 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1873 cik_srbm_select(adev, 0, 0, 0, i);
1874
1875 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1876 WREG32(mmSH_MEM_APE1_BASE, 1);
1877 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1878 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1879 }
1880 cik_srbm_select(adev, 0, 0, 0, 0);
1881 mutex_unlock(&adev->srbm_mutex);
1882
1883
1884
1885 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1886 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
1887 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
1888 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
1889 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
1890 }
1891}
1892
1893static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
1894{
1895 int vmid;
1896
1897
1898
1899
1900
1901
1902
1903 for (vmid = 1; vmid < 16; vmid++) {
1904 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
1905 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
1906 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
1907 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
1908 }
1909}
1910
1911static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1912{
1913 adev->gfx.config.double_offchip_lds_buf = 1;
1914}
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1925{
1926 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1927 u32 tmp;
1928 int i;
1929
1930 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1931
1932 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1933 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1934 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1935
1936 gfx_v7_0_tiling_mode_table_init(adev);
1937
1938 gfx_v7_0_setup_rb(adev);
1939 gfx_v7_0_get_cu_info(adev);
1940 gfx_v7_0_config_init(adev);
1941
1942
1943 WREG32(mmCP_MEQ_THRESHOLDS,
1944 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1945 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1946
1947 mutex_lock(&adev->grbm_idx_mutex);
1948
1949
1950
1951
1952 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1953
1954
1955
1956 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1958 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1959 MTYPE_NC);
1960 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1961 MTYPE_UC);
1962 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1963
1964 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1965 SWIZZLE_ENABLE, 1);
1966 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1967 ELEMENT_SIZE, 1);
1968 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1969 INDEX_STRIDE, 3);
1970 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1971
1972 mutex_lock(&adev->srbm_mutex);
1973 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1974 if (i == 0)
1975 sh_mem_base = 0;
1976 else
1977 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1978 cik_srbm_select(adev, 0, 0, 0, i);
1979
1980 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1981 WREG32(mmSH_MEM_APE1_BASE, 1);
1982 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1983 WREG32(mmSH_MEM_BASES, sh_mem_base);
1984 }
1985 cik_srbm_select(adev, 0, 0, 0, 0);
1986 mutex_unlock(&adev->srbm_mutex);
1987
1988 gfx_v7_0_init_compute_vmid(adev);
1989 gfx_v7_0_init_gds_vmid(adev);
1990
1991 WREG32(mmSX_DEBUG_1, 0x20);
1992
1993 WREG32(mmTA_CNTL_AUX, 0x00010000);
1994
1995 tmp = RREG32(mmSPI_CONFIG_CNTL);
1996 tmp |= 0x03000000;
1997 WREG32(mmSPI_CONFIG_CNTL, tmp);
1998
1999 WREG32(mmSQ_CONFIG, 1);
2000
2001 WREG32(mmDB_DEBUG, 0);
2002
2003 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2004 tmp |= 0x00000400;
2005 WREG32(mmDB_DEBUG2, tmp);
2006
2007 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2008 tmp |= 0x00020200;
2009 WREG32(mmDB_DEBUG3, tmp);
2010
2011 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2012 tmp |= 0x00018208;
2013 WREG32(mmCB_HW_CONTROL, tmp);
2014
2015 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2016
2017 WREG32(mmPA_SC_FIFO_SIZE,
2018 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2019 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2020 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2021 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2022
2023 WREG32(mmVGT_NUM_INSTANCES, 1);
2024
2025 WREG32(mmCP_PERFMON_CNTL, 0);
2026
2027 WREG32(mmSQ_CONFIG, 0);
2028
2029 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2030 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2031 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2032
2033 WREG32(mmVGT_CACHE_INVALIDATION,
2034 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2035 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2036
2037 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2038 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2039
2040 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2041 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2042 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2043
2044 tmp = RREG32(mmSPI_ARB_PRIORITY);
2045 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2046 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2047 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2048 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2049 WREG32(mmSPI_ARB_PRIORITY, tmp);
2050
2051 mutex_unlock(&adev->grbm_idx_mutex);
2052
2053 udelay(50);
2054}
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2070{
2071 adev->gfx.scratch.num_reg = 8;
2072 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2073 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2074}
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2088{
2089 struct amdgpu_device *adev = ring->adev;
2090 uint32_t scratch;
2091 uint32_t tmp = 0;
2092 unsigned i;
2093 int r;
2094
2095 r = amdgpu_gfx_scratch_get(adev, &scratch);
2096 if (r)
2097 return r;
2098
2099 WREG32(scratch, 0xCAFEDEAD);
2100 r = amdgpu_ring_alloc(ring, 3);
2101 if (r)
2102 goto error_free_scratch;
2103
2104 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2105 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2106 amdgpu_ring_write(ring, 0xDEADBEEF);
2107 amdgpu_ring_commit(ring);
2108
2109 for (i = 0; i < adev->usec_timeout; i++) {
2110 tmp = RREG32(scratch);
2111 if (tmp == 0xDEADBEEF)
2112 break;
2113 udelay(1);
2114 }
2115 if (i >= adev->usec_timeout)
2116 r = -ETIMEDOUT;
2117
2118error_free_scratch:
2119 amdgpu_gfx_scratch_free(adev, scratch);
2120 return r;
2121}
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2132{
2133 u32 ref_and_mask;
2134 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2135
2136 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2137 switch (ring->me) {
2138 case 1:
2139 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2140 break;
2141 case 2:
2142 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2143 break;
2144 default:
2145 return;
2146 }
2147 } else {
2148 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2149 }
2150
2151 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2152 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2153 WAIT_REG_MEM_FUNCTION(3) |
2154 WAIT_REG_MEM_ENGINE(usepfp)));
2155 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2156 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2157 amdgpu_ring_write(ring, ref_and_mask);
2158 amdgpu_ring_write(ring, ref_and_mask);
2159 amdgpu_ring_write(ring, 0x20);
2160}
2161
2162static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2163{
2164 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2165 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2166 EVENT_INDEX(4));
2167
2168 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2169 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2170 EVENT_INDEX(0));
2171}
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2183 u64 seq, unsigned flags)
2184{
2185 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2186 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2187
2188
2189
2190 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2191 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2192 EOP_TC_ACTION_EN |
2193 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2194 EVENT_INDEX(5)));
2195 amdgpu_ring_write(ring, addr & 0xfffffffc);
2196 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2197 DATA_SEL(1) | INT_SEL(0));
2198 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2199 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2200
2201
2202 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2203 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2204 EOP_TC_ACTION_EN |
2205 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2206 EVENT_INDEX(5)));
2207 amdgpu_ring_write(ring, addr & 0xfffffffc);
2208 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2209 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2210 amdgpu_ring_write(ring, lower_32_bits(seq));
2211 amdgpu_ring_write(ring, upper_32_bits(seq));
2212}
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2224 u64 addr, u64 seq,
2225 unsigned flags)
2226{
2227 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2228 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2229
2230
2231 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2232 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2233 EOP_TC_ACTION_EN |
2234 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2235 EVENT_INDEX(5)));
2236 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2237 amdgpu_ring_write(ring, addr & 0xfffffffc);
2238 amdgpu_ring_write(ring, upper_32_bits(addr));
2239 amdgpu_ring_write(ring, lower_32_bits(seq));
2240 amdgpu_ring_write(ring, upper_32_bits(seq));
2241}
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2259 struct amdgpu_job *job,
2260 struct amdgpu_ib *ib,
2261 uint32_t flags)
2262{
2263 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2264 u32 header, control = 0;
2265
2266
2267 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2268 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2269 amdgpu_ring_write(ring, 0);
2270 }
2271
2272 if (ib->flags & AMDGPU_IB_FLAG_CE)
2273 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2274 else
2275 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2276
2277 control |= ib->length_dw | (vmid << 24);
2278
2279 amdgpu_ring_write(ring, header);
2280 amdgpu_ring_write(ring,
2281#ifdef __BIG_ENDIAN
2282 (2 << 0) |
2283#endif
2284 (ib->gpu_addr & 0xFFFFFFFC));
2285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2286 amdgpu_ring_write(ring, control);
2287}
2288
2289static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2290 struct amdgpu_job *job,
2291 struct amdgpu_ib *ib,
2292 uint32_t flags)
2293{
2294 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2295 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
2308 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2309 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
2310 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
2311 }
2312
2313 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2314 amdgpu_ring_write(ring,
2315#ifdef __BIG_ENDIAN
2316 (2 << 0) |
2317#endif
2318 (ib->gpu_addr & 0xFFFFFFFC));
2319 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2320 amdgpu_ring_write(ring, control);
2321}
2322
2323static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2324{
2325 uint32_t dw2 = 0;
2326
2327 dw2 |= 0x80000000;
2328 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2329 gfx_v7_0_ring_emit_vgt_flush(ring);
2330
2331 dw2 |= 0x8001;
2332
2333 dw2 |= 0x01000000;
2334
2335 dw2 |= 0x10002;
2336 }
2337
2338 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2339 amdgpu_ring_write(ring, dw2);
2340 amdgpu_ring_write(ring, 0);
2341}
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2353{
2354 struct amdgpu_device *adev = ring->adev;
2355 struct amdgpu_ib ib;
2356 struct dma_fence *f = NULL;
2357 uint32_t scratch;
2358 uint32_t tmp = 0;
2359 long r;
2360
2361 r = amdgpu_gfx_scratch_get(adev, &scratch);
2362 if (r)
2363 return r;
2364
2365 WREG32(scratch, 0xCAFEDEAD);
2366 memset(&ib, 0, sizeof(ib));
2367 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2368 if (r)
2369 goto err1;
2370
2371 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2372 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2373 ib.ptr[2] = 0xDEADBEEF;
2374 ib.length_dw = 3;
2375
2376 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2377 if (r)
2378 goto err2;
2379
2380 r = dma_fence_wait_timeout(f, false, timeout);
2381 if (r == 0) {
2382 r = -ETIMEDOUT;
2383 goto err2;
2384 } else if (r < 0) {
2385 goto err2;
2386 }
2387 tmp = RREG32(scratch);
2388 if (tmp == 0xDEADBEEF)
2389 r = 0;
2390 else
2391 r = -EINVAL;
2392
2393err2:
2394 amdgpu_ib_free(adev, &ib, NULL);
2395 dma_fence_put(f);
2396err1:
2397 amdgpu_gfx_scratch_free(adev, scratch);
2398 return r;
2399}
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2433{
2434 int i;
2435
2436 if (enable) {
2437 WREG32(mmCP_ME_CNTL, 0);
2438 } else {
2439 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2440 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2441 adev->gfx.gfx_ring[i].sched.ready = false;
2442 }
2443 udelay(50);
2444}
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2455{
2456 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2457 const struct gfx_firmware_header_v1_0 *ce_hdr;
2458 const struct gfx_firmware_header_v1_0 *me_hdr;
2459 const __le32 *fw_data;
2460 unsigned i, fw_size;
2461
2462 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2463 return -EINVAL;
2464
2465 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2466 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2467 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2468
2469 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2470 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2471 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2472 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2473 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2474 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2475 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2476 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2477 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2478
2479 gfx_v7_0_cp_gfx_enable(adev, false);
2480
2481
2482 fw_data = (const __le32 *)
2483 (adev->gfx.pfp_fw->data +
2484 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2485 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2486 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2487 for (i = 0; i < fw_size; i++)
2488 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2489 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2490
2491
2492 fw_data = (const __le32 *)
2493 (adev->gfx.ce_fw->data +
2494 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2495 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2496 WREG32(mmCP_CE_UCODE_ADDR, 0);
2497 for (i = 0; i < fw_size; i++)
2498 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2499 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2500
2501
2502 fw_data = (const __le32 *)
2503 (adev->gfx.me_fw->data +
2504 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2505 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2506 WREG32(mmCP_ME_RAM_WADDR, 0);
2507 for (i = 0; i < fw_size; i++)
2508 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2509 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2510
2511 return 0;
2512}
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2524{
2525 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2526 const struct cs_section_def *sect = NULL;
2527 const struct cs_extent_def *ext = NULL;
2528 int r, i;
2529
2530
2531 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2532 WREG32(mmCP_ENDIAN_SWAP, 0);
2533 WREG32(mmCP_DEVICE_ID, 1);
2534
2535 gfx_v7_0_cp_gfx_enable(adev, true);
2536
2537 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2538 if (r) {
2539 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2540 return r;
2541 }
2542
2543
2544 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2545 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2546 amdgpu_ring_write(ring, 0x8000);
2547 amdgpu_ring_write(ring, 0x8000);
2548
2549
2550 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2551 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2552
2553 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2554 amdgpu_ring_write(ring, 0x80000000);
2555 amdgpu_ring_write(ring, 0x80000000);
2556
2557 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2558 for (ext = sect->section; ext->extent != NULL; ++ext) {
2559 if (sect->id == SECT_CONTEXT) {
2560 amdgpu_ring_write(ring,
2561 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2562 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2563 for (i = 0; i < ext->reg_count; i++)
2564 amdgpu_ring_write(ring, ext->extent[i]);
2565 }
2566 }
2567 }
2568
2569 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2570 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2571 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2572 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2573
2574 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2575 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2576
2577 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2578 amdgpu_ring_write(ring, 0);
2579
2580 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2581 amdgpu_ring_write(ring, 0x00000316);
2582 amdgpu_ring_write(ring, 0x0000000e);
2583 amdgpu_ring_write(ring, 0x00000010);
2584
2585 amdgpu_ring_commit(ring);
2586
2587 return 0;
2588}
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2600{
2601 struct amdgpu_ring *ring;
2602 u32 tmp;
2603 u32 rb_bufsz;
2604 u64 rb_addr, rptr_addr;
2605 int r;
2606
2607 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2608 if (adev->asic_type != CHIP_HAWAII)
2609 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2610
2611
2612 WREG32(mmCP_RB_WPTR_DELAY, 0);
2613
2614
2615 WREG32(mmCP_RB_VMID, 0);
2616
2617 WREG32(mmSCRATCH_ADDR, 0);
2618
2619
2620
2621 ring = &adev->gfx.gfx_ring[0];
2622 rb_bufsz = order_base_2(ring->ring_size / 8);
2623 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2624#ifdef __BIG_ENDIAN
2625 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2626#endif
2627 WREG32(mmCP_RB0_CNTL, tmp);
2628
2629
2630 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2631 ring->wptr = 0;
2632 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2633
2634
2635 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2636 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2637 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2638
2639
2640 WREG32(mmSCRATCH_UMSK, 0);
2641
2642 mdelay(1);
2643 WREG32(mmCP_RB0_CNTL, tmp);
2644
2645 rb_addr = ring->gpu_addr >> 8;
2646 WREG32(mmCP_RB0_BASE, rb_addr);
2647 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2648
2649
2650 gfx_v7_0_cp_gfx_start(adev);
2651 r = amdgpu_ring_test_helper(ring);
2652 if (r)
2653 return r;
2654
2655 return 0;
2656}
2657
2658static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2659{
2660 return ring->adev->wb.wb[ring->rptr_offs];
2661}
2662
2663static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2664{
2665 struct amdgpu_device *adev = ring->adev;
2666
2667 return RREG32(mmCP_RB0_WPTR);
2668}
2669
2670static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2671{
2672 struct amdgpu_device *adev = ring->adev;
2673
2674 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2675 (void)RREG32(mmCP_RB0_WPTR);
2676}
2677
2678static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2679{
2680
2681 return ring->adev->wb.wb[ring->wptr_offs];
2682}
2683
2684static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2685{
2686 struct amdgpu_device *adev = ring->adev;
2687
2688
2689 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2690 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2691}
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2702{
2703 int i;
2704
2705 if (enable) {
2706 WREG32(mmCP_MEC_CNTL, 0);
2707 } else {
2708 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2709 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2710 adev->gfx.compute_ring[i].sched.ready = false;
2711 }
2712 udelay(50);
2713}
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2724{
2725 const struct gfx_firmware_header_v1_0 *mec_hdr;
2726 const __le32 *fw_data;
2727 unsigned i, fw_size;
2728
2729 if (!adev->gfx.mec_fw)
2730 return -EINVAL;
2731
2732 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2733 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2734 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2735 adev->gfx.mec_feature_version = le32_to_cpu(
2736 mec_hdr->ucode_feature_version);
2737
2738 gfx_v7_0_cp_compute_enable(adev, false);
2739
2740
2741 fw_data = (const __le32 *)
2742 (adev->gfx.mec_fw->data +
2743 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2744 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2745 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2746 for (i = 0; i < fw_size; i++)
2747 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2748 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2749
2750 if (adev->asic_type == CHIP_KAVERI) {
2751 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2752
2753 if (!adev->gfx.mec2_fw)
2754 return -EINVAL;
2755
2756 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2757 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2758 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2759 adev->gfx.mec2_feature_version = le32_to_cpu(
2760 mec2_hdr->ucode_feature_version);
2761
2762
2763 fw_data = (const __le32 *)
2764 (adev->gfx.mec2_fw->data +
2765 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2766 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2767 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2768 for (i = 0; i < fw_size; i++)
2769 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2770 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2771 }
2772
2773 return 0;
2774}
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2785{
2786 int i;
2787
2788 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2789 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2790
2791 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2792 }
2793}
2794
2795static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2796{
2797 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2798}
2799
2800static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2801{
2802 int r;
2803 u32 *hpd;
2804 size_t mec_hpd_size;
2805
2806 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2807
2808
2809 amdgpu_gfx_compute_queue_acquire(adev);
2810
2811
2812 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2813 * GFX7_MEC_HPD_SIZE * 2;
2814
2815 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2816 AMDGPU_GEM_DOMAIN_VRAM,
2817 &adev->gfx.mec.hpd_eop_obj,
2818 &adev->gfx.mec.hpd_eop_gpu_addr,
2819 (void **)&hpd);
2820 if (r) {
2821 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2822 gfx_v7_0_mec_fini(adev);
2823 return r;
2824 }
2825
2826
2827 memset(hpd, 0, mec_hpd_size);
2828
2829 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2830 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2831
2832 return 0;
2833}
2834
2835struct hqd_registers
2836{
2837 u32 cp_mqd_base_addr;
2838 u32 cp_mqd_base_addr_hi;
2839 u32 cp_hqd_active;
2840 u32 cp_hqd_vmid;
2841 u32 cp_hqd_persistent_state;
2842 u32 cp_hqd_pipe_priority;
2843 u32 cp_hqd_queue_priority;
2844 u32 cp_hqd_quantum;
2845 u32 cp_hqd_pq_base;
2846 u32 cp_hqd_pq_base_hi;
2847 u32 cp_hqd_pq_rptr;
2848 u32 cp_hqd_pq_rptr_report_addr;
2849 u32 cp_hqd_pq_rptr_report_addr_hi;
2850 u32 cp_hqd_pq_wptr_poll_addr;
2851 u32 cp_hqd_pq_wptr_poll_addr_hi;
2852 u32 cp_hqd_pq_doorbell_control;
2853 u32 cp_hqd_pq_wptr;
2854 u32 cp_hqd_pq_control;
2855 u32 cp_hqd_ib_base_addr;
2856 u32 cp_hqd_ib_base_addr_hi;
2857 u32 cp_hqd_ib_rptr;
2858 u32 cp_hqd_ib_control;
2859 u32 cp_hqd_iq_timer;
2860 u32 cp_hqd_iq_rptr;
2861 u32 cp_hqd_dequeue_request;
2862 u32 cp_hqd_dma_offload;
2863 u32 cp_hqd_sema_cmd;
2864 u32 cp_hqd_msg_type;
2865 u32 cp_hqd_atomic0_preop_lo;
2866 u32 cp_hqd_atomic0_preop_hi;
2867 u32 cp_hqd_atomic1_preop_lo;
2868 u32 cp_hqd_atomic1_preop_hi;
2869 u32 cp_hqd_hq_scheduler0;
2870 u32 cp_hqd_hq_scheduler1;
2871 u32 cp_mqd_control;
2872};
2873
2874static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2875 int mec, int pipe)
2876{
2877 u64 eop_gpu_addr;
2878 u32 tmp;
2879 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2880 * GFX7_MEC_HPD_SIZE * 2;
2881
2882 mutex_lock(&adev->srbm_mutex);
2883 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2884
2885 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2886
2887
2888 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2889 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2890
2891
2892 WREG32(mmCP_HPD_EOP_VMID, 0);
2893
2894
2895 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2896 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2897 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2898 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2899
2900 cik_srbm_select(adev, 0, 0, 0, 0);
2901 mutex_unlock(&adev->srbm_mutex);
2902}
2903
2904static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2905{
2906 int i;
2907
2908
2909 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2910 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2911 for (i = 0; i < adev->usec_timeout; i++) {
2912 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2913 break;
2914 udelay(1);
2915 }
2916
2917 if (i == adev->usec_timeout)
2918 return -ETIMEDOUT;
2919
2920 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2921 WREG32(mmCP_HQD_PQ_RPTR, 0);
2922 WREG32(mmCP_HQD_PQ_WPTR, 0);
2923 }
2924
2925 return 0;
2926}
2927
2928static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2929 struct cik_mqd *mqd,
2930 uint64_t mqd_gpu_addr,
2931 struct amdgpu_ring *ring)
2932{
2933 u64 hqd_gpu_addr;
2934 u64 wb_gpu_addr;
2935
2936
2937 memset(mqd, 0, sizeof(struct cik_mqd));
2938
2939 mqd->header = 0xC0310800;
2940 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2941 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2942 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2943 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2944
2945
2946 mqd->cp_hqd_pq_doorbell_control =
2947 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2948 if (ring->use_doorbell)
2949 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2950 else
2951 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2952
2953
2954 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2955 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2956
2957
2958 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2959 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2960
2961
2962 hqd_gpu_addr = ring->gpu_addr >> 8;
2963 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2964 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2965
2966
2967 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2968 mqd->cp_hqd_pq_control &=
2969 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2970 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2971
2972 mqd->cp_hqd_pq_control |=
2973 order_base_2(ring->ring_size / 8);
2974 mqd->cp_hqd_pq_control |=
2975 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2976#ifdef __BIG_ENDIAN
2977 mqd->cp_hqd_pq_control |=
2978 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2979#endif
2980 mqd->cp_hqd_pq_control &=
2981 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2982 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2983 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2984 mqd->cp_hqd_pq_control |=
2985 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2986 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2987
2988
2989 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2990 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2991 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2992
2993
2994 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2995 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2996 mqd->cp_hqd_pq_rptr_report_addr_hi =
2997 upper_32_bits(wb_gpu_addr) & 0xffff;
2998
2999
3000 if (ring->use_doorbell) {
3001 mqd->cp_hqd_pq_doorbell_control =
3002 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3003 mqd->cp_hqd_pq_doorbell_control &=
3004 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3005 mqd->cp_hqd_pq_doorbell_control |=
3006 (ring->doorbell_index <<
3007 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3008 mqd->cp_hqd_pq_doorbell_control |=
3009 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3010 mqd->cp_hqd_pq_doorbell_control &=
3011 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3012 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3013
3014 } else {
3015 mqd->cp_hqd_pq_doorbell_control = 0;
3016 }
3017
3018
3019 ring->wptr = 0;
3020 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3021 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3022
3023
3024 mqd->cp_hqd_vmid = 0;
3025
3026
3027 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3028 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3029 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3030 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3031 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3032 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3033 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3034 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3035 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3036 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3037 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3038 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3039 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3040 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3041 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3042 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3043
3044
3045 mqd->cp_hqd_active = 1;
3046}
3047
3048int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3049{
3050 uint32_t tmp;
3051 uint32_t mqd_reg;
3052 uint32_t *mqd_data;
3053
3054
3055 mqd_data = &mqd->cp_mqd_base_addr_lo;
3056
3057
3058 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3059 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3060 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3061
3062
3063 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3064 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3065
3066
3067 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3068 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3069
3070 return 0;
3071}
3072
3073static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3074{
3075 int r;
3076 u64 mqd_gpu_addr;
3077 struct cik_mqd *mqd;
3078 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3079
3080 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3081 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3082 &mqd_gpu_addr, (void **)&mqd);
3083 if (r) {
3084 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3085 return r;
3086 }
3087
3088 mutex_lock(&adev->srbm_mutex);
3089 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3090
3091 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3092 gfx_v7_0_mqd_deactivate(adev);
3093 gfx_v7_0_mqd_commit(adev, mqd);
3094
3095 cik_srbm_select(adev, 0, 0, 0, 0);
3096 mutex_unlock(&adev->srbm_mutex);
3097
3098 amdgpu_bo_kunmap(ring->mqd_obj);
3099 amdgpu_bo_unreserve(ring->mqd_obj);
3100 return 0;
3101}
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3113{
3114 int r, i, j;
3115 u32 tmp;
3116 struct amdgpu_ring *ring;
3117
3118
3119 tmp = RREG32(mmCP_CPF_DEBUG);
3120 tmp |= (1 << 23);
3121 WREG32(mmCP_CPF_DEBUG, tmp);
3122
3123
3124 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3125 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3126 gfx_v7_0_compute_pipe_init(adev, i, j);
3127
3128
3129 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3130 r = gfx_v7_0_compute_queue_init(adev, i);
3131 if (r) {
3132 gfx_v7_0_cp_compute_fini(adev);
3133 return r;
3134 }
3135 }
3136
3137 gfx_v7_0_cp_compute_enable(adev, true);
3138
3139 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3140 ring = &adev->gfx.compute_ring[i];
3141 amdgpu_ring_test_helper(ring);
3142 }
3143
3144 return 0;
3145}
3146
3147static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3148{
3149 gfx_v7_0_cp_gfx_enable(adev, enable);
3150 gfx_v7_0_cp_compute_enable(adev, enable);
3151}
3152
3153static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3154{
3155 int r;
3156
3157 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3158 if (r)
3159 return r;
3160 r = gfx_v7_0_cp_compute_load_microcode(adev);
3161 if (r)
3162 return r;
3163
3164 return 0;
3165}
3166
3167static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3168 bool enable)
3169{
3170 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3171
3172 if (enable)
3173 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3174 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3175 else
3176 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3177 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3178 WREG32(mmCP_INT_CNTL_RING0, tmp);
3179}
3180
3181static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3182{
3183 int r;
3184
3185 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3186
3187 r = gfx_v7_0_cp_load_microcode(adev);
3188 if (r)
3189 return r;
3190
3191 r = gfx_v7_0_cp_gfx_resume(adev);
3192 if (r)
3193 return r;
3194 r = gfx_v7_0_cp_compute_resume(adev);
3195 if (r)
3196 return r;
3197
3198 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3199
3200 return 0;
3201}
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3212{
3213 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3214 uint32_t seq = ring->fence_drv.sync_seq;
3215 uint64_t addr = ring->fence_drv.gpu_addr;
3216
3217 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3218 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3219 WAIT_REG_MEM_FUNCTION(3) |
3220 WAIT_REG_MEM_ENGINE(usepfp)));
3221 amdgpu_ring_write(ring, addr & 0xfffffffc);
3222 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3223 amdgpu_ring_write(ring, seq);
3224 amdgpu_ring_write(ring, 0xffffffff);
3225 amdgpu_ring_write(ring, 4);
3226
3227 if (usepfp) {
3228
3229 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3230 amdgpu_ring_write(ring, 0);
3231 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3232 amdgpu_ring_write(ring, 0);
3233 }
3234}
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3251 unsigned vmid, uint64_t pd_addr)
3252{
3253 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3254
3255 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3256
3257
3258 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3259 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3260 WAIT_REG_MEM_FUNCTION(0) |
3261 WAIT_REG_MEM_ENGINE(0)));
3262 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3263 amdgpu_ring_write(ring, 0);
3264 amdgpu_ring_write(ring, 0);
3265 amdgpu_ring_write(ring, 0);
3266 amdgpu_ring_write(ring, 0x20);
3267
3268
3269 if (usepfp) {
3270
3271 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3272 amdgpu_ring_write(ring, 0x0);
3273
3274
3275 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3276 amdgpu_ring_write(ring, 0);
3277 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3278 amdgpu_ring_write(ring, 0);
3279 }
3280}
3281
3282static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3283 uint32_t reg, uint32_t val)
3284{
3285 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3286
3287 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3288 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3289 WRITE_DATA_DST_SEL(0)));
3290 amdgpu_ring_write(ring, reg);
3291 amdgpu_ring_write(ring, 0);
3292 amdgpu_ring_write(ring, val);
3293}
3294
3295
3296
3297
3298
3299
3300static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3301{
3302 const u32 *src_ptr;
3303 u32 dws;
3304 const struct cs_section_def *cs_data;
3305 int r;
3306
3307
3308 if (adev->flags & AMD_IS_APU) {
3309 if (adev->asic_type == CHIP_KAVERI) {
3310 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3311 adev->gfx.rlc.reg_list_size =
3312 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3313 } else {
3314 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3315 adev->gfx.rlc.reg_list_size =
3316 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3317 }
3318 }
3319 adev->gfx.rlc.cs_data = ci_cs_data;
3320 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3321 adev->gfx.rlc.cp_table_size += 64 * 1024;
3322
3323 src_ptr = adev->gfx.rlc.reg_list;
3324 dws = adev->gfx.rlc.reg_list_size;
3325 dws += (5 * 16) + 48 + 48 + 64;
3326
3327 cs_data = adev->gfx.rlc.cs_data;
3328
3329 if (src_ptr) {
3330
3331 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3332 if (r)
3333 return r;
3334 }
3335
3336 if (cs_data) {
3337
3338 r = amdgpu_gfx_rlc_init_csb(adev);
3339 if (r)
3340 return r;
3341 }
3342
3343 if (adev->gfx.rlc.cp_table_size) {
3344 r = amdgpu_gfx_rlc_init_cpt(adev);
3345 if (r)
3346 return r;
3347 }
3348
3349
3350 if (adev->gfx.rlc.funcs->update_spm_vmid)
3351 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
3352
3353 return 0;
3354}
3355
3356static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3357{
3358 u32 tmp;
3359
3360 tmp = RREG32(mmRLC_LB_CNTL);
3361 if (enable)
3362 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3363 else
3364 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3365 WREG32(mmRLC_LB_CNTL, tmp);
3366}
3367
3368static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3369{
3370 u32 i, j, k;
3371 u32 mask;
3372
3373 mutex_lock(&adev->grbm_idx_mutex);
3374 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3375 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3376 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3377 for (k = 0; k < adev->usec_timeout; k++) {
3378 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3379 break;
3380 udelay(1);
3381 }
3382 }
3383 }
3384 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3385 mutex_unlock(&adev->grbm_idx_mutex);
3386
3387 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3388 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3389 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3390 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3391 for (k = 0; k < adev->usec_timeout; k++) {
3392 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3393 break;
3394 udelay(1);
3395 }
3396}
3397
3398static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3399{
3400 u32 tmp;
3401
3402 tmp = RREG32(mmRLC_CNTL);
3403 if (tmp != rlc)
3404 WREG32(mmRLC_CNTL, rlc);
3405}
3406
3407static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3408{
3409 u32 data, orig;
3410
3411 orig = data = RREG32(mmRLC_CNTL);
3412
3413 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3414 u32 i;
3415
3416 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3417 WREG32(mmRLC_CNTL, data);
3418
3419 for (i = 0; i < adev->usec_timeout; i++) {
3420 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3421 break;
3422 udelay(1);
3423 }
3424
3425 gfx_v7_0_wait_for_rlc_serdes(adev);
3426 }
3427
3428 return orig;
3429}
3430
3431static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3432{
3433 return true;
3434}
3435
3436static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3437{
3438 u32 tmp, i, mask;
3439
3440 tmp = 0x1 | (1 << 1);
3441 WREG32(mmRLC_GPR_REG2, tmp);
3442
3443 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3444 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3445 for (i = 0; i < adev->usec_timeout; i++) {
3446 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3447 break;
3448 udelay(1);
3449 }
3450
3451 for (i = 0; i < adev->usec_timeout; i++) {
3452 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3453 break;
3454 udelay(1);
3455 }
3456}
3457
3458static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3459{
3460 u32 tmp;
3461
3462 tmp = 0x1 | (0 << 1);
3463 WREG32(mmRLC_GPR_REG2, tmp);
3464}
3465
3466
3467
3468
3469
3470
3471
3472
3473static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3474{
3475 WREG32(mmRLC_CNTL, 0);
3476
3477 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3478
3479 gfx_v7_0_wait_for_rlc_serdes(adev);
3480}
3481
3482
3483
3484
3485
3486
3487
3488
3489static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3490{
3491 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3492
3493 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3494
3495 udelay(50);
3496}
3497
3498static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3499{
3500 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3501
3502 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3503 WREG32(mmGRBM_SOFT_RESET, tmp);
3504 udelay(50);
3505 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3506 WREG32(mmGRBM_SOFT_RESET, tmp);
3507 udelay(50);
3508}
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3520{
3521 const struct rlc_firmware_header_v1_0 *hdr;
3522 const __le32 *fw_data;
3523 unsigned i, fw_size;
3524 u32 tmp;
3525
3526 if (!adev->gfx.rlc_fw)
3527 return -EINVAL;
3528
3529 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3530 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3531 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3532 adev->gfx.rlc_feature_version = le32_to_cpu(
3533 hdr->ucode_feature_version);
3534
3535 adev->gfx.rlc.funcs->stop(adev);
3536
3537
3538 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3539 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3540
3541 adev->gfx.rlc.funcs->reset(adev);
3542
3543 gfx_v7_0_init_pg(adev);
3544
3545 WREG32(mmRLC_LB_CNTR_INIT, 0);
3546 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3547
3548 mutex_lock(&adev->grbm_idx_mutex);
3549 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3550 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3551 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3552 WREG32(mmRLC_LB_CNTL, 0x80000004);
3553 mutex_unlock(&adev->grbm_idx_mutex);
3554
3555 WREG32(mmRLC_MC_CNTL, 0);
3556 WREG32(mmRLC_UCODE_CNTL, 0);
3557
3558 fw_data = (const __le32 *)
3559 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3560 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3561 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3562 for (i = 0; i < fw_size; i++)
3563 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3564 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3565
3566
3567 gfx_v7_0_enable_lbpw(adev, false);
3568
3569 if (adev->asic_type == CHIP_BONAIRE)
3570 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3571
3572 adev->gfx.rlc.funcs->start(adev);
3573
3574 return 0;
3575}
3576
3577static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
3578{
3579 u32 data;
3580
3581 data = RREG32(mmRLC_SPM_VMID);
3582
3583 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
3584 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
3585
3586 WREG32(mmRLC_SPM_VMID, data);
3587}
3588
3589static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3590{
3591 u32 data, orig, tmp, tmp2;
3592
3593 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3594
3595 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3596 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3597
3598 tmp = gfx_v7_0_halt_rlc(adev);
3599
3600 mutex_lock(&adev->grbm_idx_mutex);
3601 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3602 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3603 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3604 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3605 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3606 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3607 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3608 mutex_unlock(&adev->grbm_idx_mutex);
3609
3610 gfx_v7_0_update_rlc(adev, tmp);
3611
3612 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3613 if (orig != data)
3614 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3615
3616 } else {
3617 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3618
3619 RREG32(mmCB_CGTT_SCLK_CTRL);
3620 RREG32(mmCB_CGTT_SCLK_CTRL);
3621 RREG32(mmCB_CGTT_SCLK_CTRL);
3622 RREG32(mmCB_CGTT_SCLK_CTRL);
3623
3624 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3625 if (orig != data)
3626 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3627
3628 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3629 }
3630}
3631
3632static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3633{
3634 u32 data, orig, tmp = 0;
3635
3636 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3637 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3638 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3639 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3640 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3641 if (orig != data)
3642 WREG32(mmCP_MEM_SLP_CNTL, data);
3643 }
3644 }
3645
3646 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3647 data |= 0x00000001;
3648 data &= 0xfffffffd;
3649 if (orig != data)
3650 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3651
3652 tmp = gfx_v7_0_halt_rlc(adev);
3653
3654 mutex_lock(&adev->grbm_idx_mutex);
3655 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3656 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3657 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3658 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3659 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3660 WREG32(mmRLC_SERDES_WR_CTRL, data);
3661 mutex_unlock(&adev->grbm_idx_mutex);
3662
3663 gfx_v7_0_update_rlc(adev, tmp);
3664
3665 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3666 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3667 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3668 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3669 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3670 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3671 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3672 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3673 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3674 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3675 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3676 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3677 if (orig != data)
3678 WREG32(mmCGTS_SM_CTRL_REG, data);
3679 }
3680 } else {
3681 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3682 data |= 0x00000003;
3683 if (orig != data)
3684 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3685
3686 data = RREG32(mmRLC_MEM_SLP_CNTL);
3687 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3688 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3689 WREG32(mmRLC_MEM_SLP_CNTL, data);
3690 }
3691
3692 data = RREG32(mmCP_MEM_SLP_CNTL);
3693 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3694 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3695 WREG32(mmCP_MEM_SLP_CNTL, data);
3696 }
3697
3698 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3699 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3700 if (orig != data)
3701 WREG32(mmCGTS_SM_CTRL_REG, data);
3702
3703 tmp = gfx_v7_0_halt_rlc(adev);
3704
3705 mutex_lock(&adev->grbm_idx_mutex);
3706 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3707 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3708 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3709 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3710 WREG32(mmRLC_SERDES_WR_CTRL, data);
3711 mutex_unlock(&adev->grbm_idx_mutex);
3712
3713 gfx_v7_0_update_rlc(adev, tmp);
3714 }
3715}
3716
3717static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3718 bool enable)
3719{
3720 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3721
3722 if (enable) {
3723 gfx_v7_0_enable_mgcg(adev, true);
3724 gfx_v7_0_enable_cgcg(adev, true);
3725 } else {
3726 gfx_v7_0_enable_cgcg(adev, false);
3727 gfx_v7_0_enable_mgcg(adev, false);
3728 }
3729 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3730}
3731
3732static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3733 bool enable)
3734{
3735 u32 data, orig;
3736
3737 orig = data = RREG32(mmRLC_PG_CNTL);
3738 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3739 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3740 else
3741 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3742 if (orig != data)
3743 WREG32(mmRLC_PG_CNTL, data);
3744}
3745
3746static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3747 bool enable)
3748{
3749 u32 data, orig;
3750
3751 orig = data = RREG32(mmRLC_PG_CNTL);
3752 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3753 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3754 else
3755 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3756 if (orig != data)
3757 WREG32(mmRLC_PG_CNTL, data);
3758}
3759
3760static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3761{
3762 u32 data, orig;
3763
3764 orig = data = RREG32(mmRLC_PG_CNTL);
3765 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3766 data &= ~0x8000;
3767 else
3768 data |= 0x8000;
3769 if (orig != data)
3770 WREG32(mmRLC_PG_CNTL, data);
3771}
3772
3773static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3774{
3775 u32 data, orig;
3776
3777 orig = data = RREG32(mmRLC_PG_CNTL);
3778 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3779 data &= ~0x2000;
3780 else
3781 data |= 0x2000;
3782 if (orig != data)
3783 WREG32(mmRLC_PG_CNTL, data);
3784}
3785
3786static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3787{
3788 if (adev->asic_type == CHIP_KAVERI)
3789 return 5;
3790 else
3791 return 4;
3792}
3793
3794static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3795 bool enable)
3796{
3797 u32 data, orig;
3798
3799 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3800 orig = data = RREG32(mmRLC_PG_CNTL);
3801 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3802 if (orig != data)
3803 WREG32(mmRLC_PG_CNTL, data);
3804
3805 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3806 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3807 if (orig != data)
3808 WREG32(mmRLC_AUTO_PG_CTRL, data);
3809 } else {
3810 orig = data = RREG32(mmRLC_PG_CNTL);
3811 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3812 if (orig != data)
3813 WREG32(mmRLC_PG_CNTL, data);
3814
3815 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3816 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3817 if (orig != data)
3818 WREG32(mmRLC_AUTO_PG_CTRL, data);
3819
3820 data = RREG32(mmDB_RENDER_CONTROL);
3821 }
3822}
3823
3824static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3825 u32 bitmap)
3826{
3827 u32 data;
3828
3829 if (!bitmap)
3830 return;
3831
3832 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3833 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3834
3835 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3836}
3837
3838static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3839{
3840 u32 data, mask;
3841
3842 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3843 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3844
3845 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3846 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3847
3848 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3849
3850 return (~data) & mask;
3851}
3852
3853static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3854{
3855 u32 tmp;
3856
3857 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3858
3859 tmp = RREG32(mmRLC_MAX_PG_CU);
3860 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3861 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3862 WREG32(mmRLC_MAX_PG_CU, tmp);
3863}
3864
3865static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3866 bool enable)
3867{
3868 u32 data, orig;
3869
3870 orig = data = RREG32(mmRLC_PG_CNTL);
3871 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3872 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3873 else
3874 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3875 if (orig != data)
3876 WREG32(mmRLC_PG_CNTL, data);
3877}
3878
3879static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3880 bool enable)
3881{
3882 u32 data, orig;
3883
3884 orig = data = RREG32(mmRLC_PG_CNTL);
3885 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3886 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3887 else
3888 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3889 if (orig != data)
3890 WREG32(mmRLC_PG_CNTL, data);
3891}
3892
3893#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3894#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3895
3896static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3897{
3898 u32 data, orig;
3899 u32 i;
3900
3901 if (adev->gfx.rlc.cs_data) {
3902 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3903 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3904 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3905 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3906 } else {
3907 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3908 for (i = 0; i < 3; i++)
3909 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3910 }
3911 if (adev->gfx.rlc.reg_list) {
3912 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3913 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3914 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3915 }
3916
3917 orig = data = RREG32(mmRLC_PG_CNTL);
3918 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3919 if (orig != data)
3920 WREG32(mmRLC_PG_CNTL, data);
3921
3922 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3923 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3924
3925 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3926 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3927 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3928 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3929
3930 data = 0x10101010;
3931 WREG32(mmRLC_PG_DELAY, data);
3932
3933 data = RREG32(mmRLC_PG_DELAY_2);
3934 data &= ~0xff;
3935 data |= 0x3;
3936 WREG32(mmRLC_PG_DELAY_2, data);
3937
3938 data = RREG32(mmRLC_AUTO_PG_CTRL);
3939 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3940 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3941 WREG32(mmRLC_AUTO_PG_CTRL, data);
3942
3943}
3944
3945static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
3946{
3947 gfx_v7_0_enable_gfx_cgpg(adev, enable);
3948 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
3949 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
3950}
3951
3952static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
3953{
3954 u32 count = 0;
3955 const struct cs_section_def *sect = NULL;
3956 const struct cs_extent_def *ext = NULL;
3957
3958 if (adev->gfx.rlc.cs_data == NULL)
3959 return 0;
3960
3961
3962 count += 2;
3963
3964 count += 3;
3965
3966 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3967 for (ext = sect->section; ext->extent != NULL; ++ext) {
3968 if (sect->id == SECT_CONTEXT)
3969 count += 2 + ext->reg_count;
3970 else
3971 return 0;
3972 }
3973 }
3974
3975 count += 4;
3976
3977 count += 2;
3978
3979 count += 2;
3980
3981 return count;
3982}
3983
3984static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
3985 volatile u32 *buffer)
3986{
3987 u32 count = 0, i;
3988 const struct cs_section_def *sect = NULL;
3989 const struct cs_extent_def *ext = NULL;
3990
3991 if (adev->gfx.rlc.cs_data == NULL)
3992 return;
3993 if (buffer == NULL)
3994 return;
3995
3996 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000 buffer[count++] = cpu_to_le32(0x80000000);
4001 buffer[count++] = cpu_to_le32(0x80000000);
4002
4003 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4004 for (ext = sect->section; ext->extent != NULL; ++ext) {
4005 if (sect->id == SECT_CONTEXT) {
4006 buffer[count++] =
4007 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4008 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4009 for (i = 0; i < ext->reg_count; i++)
4010 buffer[count++] = cpu_to_le32(ext->extent[i]);
4011 } else {
4012 return;
4013 }
4014 }
4015 }
4016
4017 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4018 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4019 switch (adev->asic_type) {
4020 case CHIP_BONAIRE:
4021 buffer[count++] = cpu_to_le32(0x16000012);
4022 buffer[count++] = cpu_to_le32(0x00000000);
4023 break;
4024 case CHIP_KAVERI:
4025 buffer[count++] = cpu_to_le32(0x00000000);
4026 buffer[count++] = cpu_to_le32(0x00000000);
4027 break;
4028 case CHIP_KABINI:
4029 case CHIP_MULLINS:
4030 buffer[count++] = cpu_to_le32(0x00000000);
4031 buffer[count++] = cpu_to_le32(0x00000000);
4032 break;
4033 case CHIP_HAWAII:
4034 buffer[count++] = cpu_to_le32(0x3a00161a);
4035 buffer[count++] = cpu_to_le32(0x0000002e);
4036 break;
4037 default:
4038 buffer[count++] = cpu_to_le32(0x00000000);
4039 buffer[count++] = cpu_to_le32(0x00000000);
4040 break;
4041 }
4042
4043 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4044 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4045
4046 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4047 buffer[count++] = cpu_to_le32(0);
4048}
4049
4050static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4051{
4052 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4053 AMD_PG_SUPPORT_GFX_SMG |
4054 AMD_PG_SUPPORT_GFX_DMG |
4055 AMD_PG_SUPPORT_CP |
4056 AMD_PG_SUPPORT_GDS |
4057 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4058 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4059 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4060 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4061 gfx_v7_0_init_gfx_cgpg(adev);
4062 gfx_v7_0_enable_cp_pg(adev, true);
4063 gfx_v7_0_enable_gds_pg(adev, true);
4064 }
4065 gfx_v7_0_init_ao_cu_mask(adev);
4066 gfx_v7_0_update_gfx_pg(adev, true);
4067 }
4068}
4069
4070static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4071{
4072 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4073 AMD_PG_SUPPORT_GFX_SMG |
4074 AMD_PG_SUPPORT_GFX_DMG |
4075 AMD_PG_SUPPORT_CP |
4076 AMD_PG_SUPPORT_GDS |
4077 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4078 gfx_v7_0_update_gfx_pg(adev, false);
4079 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4080 gfx_v7_0_enable_cp_pg(adev, false);
4081 gfx_v7_0_enable_gds_pg(adev, false);
4082 }
4083 }
4084}
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4095{
4096 uint64_t clock;
4097
4098 mutex_lock(&adev->gfx.gpu_clock_mutex);
4099 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4100 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4101 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4102 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4103 return clock;
4104}
4105
4106static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4107 uint32_t vmid,
4108 uint32_t gds_base, uint32_t gds_size,
4109 uint32_t gws_base, uint32_t gws_size,
4110 uint32_t oa_base, uint32_t oa_size)
4111{
4112
4113 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4114 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4115 WRITE_DATA_DST_SEL(0)));
4116 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4117 amdgpu_ring_write(ring, 0);
4118 amdgpu_ring_write(ring, gds_base);
4119
4120
4121 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4122 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4123 WRITE_DATA_DST_SEL(0)));
4124 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4125 amdgpu_ring_write(ring, 0);
4126 amdgpu_ring_write(ring, gds_size);
4127
4128
4129 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4130 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4131 WRITE_DATA_DST_SEL(0)));
4132 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4133 amdgpu_ring_write(ring, 0);
4134 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4135
4136
4137 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4138 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4139 WRITE_DATA_DST_SEL(0)));
4140 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4141 amdgpu_ring_write(ring, 0);
4142 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4143}
4144
4145static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4146{
4147 struct amdgpu_device *adev = ring->adev;
4148 uint32_t value = 0;
4149
4150 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4151 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4152 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4153 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4154 WREG32(mmSQ_CMD, value);
4155}
4156
4157static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4158{
4159 WREG32(mmSQ_IND_INDEX,
4160 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4161 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4162 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4163 (SQ_IND_INDEX__FORCE_READ_MASK));
4164 return RREG32(mmSQ_IND_DATA);
4165}
4166
4167static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4168 uint32_t wave, uint32_t thread,
4169 uint32_t regno, uint32_t num, uint32_t *out)
4170{
4171 WREG32(mmSQ_IND_INDEX,
4172 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4173 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4174 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4175 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4176 (SQ_IND_INDEX__FORCE_READ_MASK) |
4177 (SQ_IND_INDEX__AUTO_INCR_MASK));
4178 while (num--)
4179 *(out++) = RREG32(mmSQ_IND_DATA);
4180}
4181
4182static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4183{
4184
4185 dst[(*no_fields)++] = 0;
4186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4198 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4199 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4200 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4201 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4202 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4203 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4204}
4205
4206static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4207 uint32_t wave, uint32_t start,
4208 uint32_t size, uint32_t *dst)
4209{
4210 wave_read_regs(
4211 adev, simd, wave, 0,
4212 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4213}
4214
4215static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4216 u32 me, u32 pipe, u32 q, u32 vm)
4217{
4218 cik_srbm_select(adev, me, pipe, q, vm);
4219}
4220
4221static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4222 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4223 .select_se_sh = &gfx_v7_0_select_se_sh,
4224 .read_wave_data = &gfx_v7_0_read_wave_data,
4225 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4226 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4227};
4228
4229static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4230 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4231 .set_safe_mode = gfx_v7_0_set_safe_mode,
4232 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4233 .init = gfx_v7_0_rlc_init,
4234 .get_csb_size = gfx_v7_0_get_csb_size,
4235 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4236 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4237 .resume = gfx_v7_0_rlc_resume,
4238 .stop = gfx_v7_0_rlc_stop,
4239 .reset = gfx_v7_0_rlc_reset,
4240 .start = gfx_v7_0_rlc_start,
4241 .update_spm_vmid = gfx_v7_0_update_spm_vmid
4242};
4243
4244static int gfx_v7_0_early_init(void *handle)
4245{
4246 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4247
4248 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4249 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4250 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4251 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4252 gfx_v7_0_set_ring_funcs(adev);
4253 gfx_v7_0_set_irq_funcs(adev);
4254 gfx_v7_0_set_gds_init(adev);
4255
4256 return 0;
4257}
4258
4259static int gfx_v7_0_late_init(void *handle)
4260{
4261 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4262 int r;
4263
4264 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4265 if (r)
4266 return r;
4267
4268 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4269 if (r)
4270 return r;
4271
4272 return 0;
4273}
4274
4275static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4276{
4277 u32 gb_addr_config;
4278 u32 mc_arb_ramcfg;
4279 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4280 u32 tmp;
4281
4282 switch (adev->asic_type) {
4283 case CHIP_BONAIRE:
4284 adev->gfx.config.max_shader_engines = 2;
4285 adev->gfx.config.max_tile_pipes = 4;
4286 adev->gfx.config.max_cu_per_sh = 7;
4287 adev->gfx.config.max_sh_per_se = 1;
4288 adev->gfx.config.max_backends_per_se = 2;
4289 adev->gfx.config.max_texture_channel_caches = 4;
4290 adev->gfx.config.max_gprs = 256;
4291 adev->gfx.config.max_gs_threads = 32;
4292 adev->gfx.config.max_hw_contexts = 8;
4293
4294 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4295 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4296 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4297 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4298 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4299 break;
4300 case CHIP_HAWAII:
4301 adev->gfx.config.max_shader_engines = 4;
4302 adev->gfx.config.max_tile_pipes = 16;
4303 adev->gfx.config.max_cu_per_sh = 11;
4304 adev->gfx.config.max_sh_per_se = 1;
4305 adev->gfx.config.max_backends_per_se = 4;
4306 adev->gfx.config.max_texture_channel_caches = 16;
4307 adev->gfx.config.max_gprs = 256;
4308 adev->gfx.config.max_gs_threads = 32;
4309 adev->gfx.config.max_hw_contexts = 8;
4310
4311 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4312 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4313 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4314 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4315 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4316 break;
4317 case CHIP_KAVERI:
4318 adev->gfx.config.max_shader_engines = 1;
4319 adev->gfx.config.max_tile_pipes = 4;
4320 adev->gfx.config.max_cu_per_sh = 8;
4321 adev->gfx.config.max_backends_per_se = 2;
4322 adev->gfx.config.max_sh_per_se = 1;
4323 adev->gfx.config.max_texture_channel_caches = 4;
4324 adev->gfx.config.max_gprs = 256;
4325 adev->gfx.config.max_gs_threads = 16;
4326 adev->gfx.config.max_hw_contexts = 8;
4327
4328 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4329 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4330 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4331 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4332 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4333 break;
4334 case CHIP_KABINI:
4335 case CHIP_MULLINS:
4336 default:
4337 adev->gfx.config.max_shader_engines = 1;
4338 adev->gfx.config.max_tile_pipes = 2;
4339 adev->gfx.config.max_cu_per_sh = 2;
4340 adev->gfx.config.max_sh_per_se = 1;
4341 adev->gfx.config.max_backends_per_se = 1;
4342 adev->gfx.config.max_texture_channel_caches = 2;
4343 adev->gfx.config.max_gprs = 256;
4344 adev->gfx.config.max_gs_threads = 16;
4345 adev->gfx.config.max_hw_contexts = 8;
4346
4347 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4348 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4349 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4350 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4351 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4352 break;
4353 }
4354
4355 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4356 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4357
4358 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
4359 MC_ARB_RAMCFG, NOOFBANK);
4360 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
4361 MC_ARB_RAMCFG, NOOFRANKS);
4362
4363 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4364 adev->gfx.config.mem_max_burst_length_bytes = 256;
4365 if (adev->flags & AMD_IS_APU) {
4366
4367 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4368 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4369 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4370
4371 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4372 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4373 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4374
4375
4376 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4377 dimm00_addr_map = 0;
4378 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4379 dimm01_addr_map = 0;
4380 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4381 dimm10_addr_map = 0;
4382 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4383 dimm11_addr_map = 0;
4384
4385
4386
4387 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4388 adev->gfx.config.mem_row_size_in_kb = 2;
4389 else
4390 adev->gfx.config.mem_row_size_in_kb = 1;
4391 } else {
4392 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4393 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4394 if (adev->gfx.config.mem_row_size_in_kb > 4)
4395 adev->gfx.config.mem_row_size_in_kb = 4;
4396 }
4397
4398 adev->gfx.config.shader_engine_tile_size = 32;
4399 adev->gfx.config.num_gpus = 1;
4400 adev->gfx.config.multi_gpu_tile_size = 64;
4401
4402
4403 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4404 switch (adev->gfx.config.mem_row_size_in_kb) {
4405 case 1:
4406 default:
4407 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4408 break;
4409 case 2:
4410 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4411 break;
4412 case 4:
4413 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4414 break;
4415 }
4416 adev->gfx.config.gb_addr_config = gb_addr_config;
4417}
4418
4419static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4420 int mec, int pipe, int queue)
4421{
4422 int r;
4423 unsigned irq_type;
4424 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4425
4426
4427 ring->me = mec + 1;
4428 ring->pipe = pipe;
4429 ring->queue = queue;
4430
4431 ring->ring_obj = NULL;
4432 ring->use_doorbell = true;
4433 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
4434 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4435
4436 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4437 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4438 + ring->pipe;
4439
4440
4441 r = amdgpu_ring_init(adev, ring, 1024,
4442 &adev->gfx.eop_irq, irq_type);
4443 if (r)
4444 return r;
4445
4446
4447 return 0;
4448}
4449
4450static int gfx_v7_0_sw_init(void *handle)
4451{
4452 struct amdgpu_ring *ring;
4453 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4454 int i, j, k, r, ring_id;
4455
4456 switch (adev->asic_type) {
4457 case CHIP_KAVERI:
4458 adev->gfx.mec.num_mec = 2;
4459 break;
4460 case CHIP_BONAIRE:
4461 case CHIP_HAWAII:
4462 case CHIP_KABINI:
4463 case CHIP_MULLINS:
4464 default:
4465 adev->gfx.mec.num_mec = 1;
4466 break;
4467 }
4468 adev->gfx.mec.num_pipe_per_mec = 4;
4469 adev->gfx.mec.num_queue_per_pipe = 8;
4470
4471
4472 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4473 if (r)
4474 return r;
4475
4476
4477 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4478 &adev->gfx.priv_reg_irq);
4479 if (r)
4480 return r;
4481
4482
4483 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4484 &adev->gfx.priv_inst_irq);
4485 if (r)
4486 return r;
4487
4488 gfx_v7_0_scratch_init(adev);
4489
4490 r = gfx_v7_0_init_microcode(adev);
4491 if (r) {
4492 DRM_ERROR("Failed to load gfx firmware!\n");
4493 return r;
4494 }
4495
4496 r = adev->gfx.rlc.funcs->init(adev);
4497 if (r) {
4498 DRM_ERROR("Failed to init rlc BOs!\n");
4499 return r;
4500 }
4501
4502
4503 r = gfx_v7_0_mec_init(adev);
4504 if (r) {
4505 DRM_ERROR("Failed to init MEC BOs!\n");
4506 return r;
4507 }
4508
4509 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4510 ring = &adev->gfx.gfx_ring[i];
4511 ring->ring_obj = NULL;
4512 sprintf(ring->name, "gfx");
4513 r = amdgpu_ring_init(adev, ring, 1024,
4514 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
4515 if (r)
4516 return r;
4517 }
4518
4519
4520 ring_id = 0;
4521 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4522 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4523 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4524 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4525 continue;
4526
4527 r = gfx_v7_0_compute_ring_init(adev,
4528 ring_id,
4529 i, k, j);
4530 if (r)
4531 return r;
4532
4533 ring_id++;
4534 }
4535 }
4536 }
4537
4538 adev->gfx.ce_ram_size = 0x8000;
4539
4540 gfx_v7_0_gpu_early_init(adev);
4541
4542 return r;
4543}
4544
4545static int gfx_v7_0_sw_fini(void *handle)
4546{
4547 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4548 int i;
4549
4550 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4551 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4552 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4553 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4554
4555 gfx_v7_0_cp_compute_fini(adev);
4556 amdgpu_gfx_rlc_fini(adev);
4557 gfx_v7_0_mec_fini(adev);
4558 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4559 &adev->gfx.rlc.clear_state_gpu_addr,
4560 (void **)&adev->gfx.rlc.cs_ptr);
4561 if (adev->gfx.rlc.cp_table_size) {
4562 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4563 &adev->gfx.rlc.cp_table_gpu_addr,
4564 (void **)&adev->gfx.rlc.cp_table_ptr);
4565 }
4566 gfx_v7_0_free_microcode(adev);
4567
4568 return 0;
4569}
4570
4571static int gfx_v7_0_hw_init(void *handle)
4572{
4573 int r;
4574 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4575
4576 gfx_v7_0_constants_init(adev);
4577
4578
4579 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
4580
4581 r = adev->gfx.rlc.funcs->resume(adev);
4582 if (r)
4583 return r;
4584
4585 r = gfx_v7_0_cp_resume(adev);
4586 if (r)
4587 return r;
4588
4589 return r;
4590}
4591
4592static int gfx_v7_0_hw_fini(void *handle)
4593{
4594 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4595
4596 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4597 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4598 gfx_v7_0_cp_enable(adev, false);
4599 adev->gfx.rlc.funcs->stop(adev);
4600 gfx_v7_0_fini_pg(adev);
4601
4602 return 0;
4603}
4604
4605static int gfx_v7_0_suspend(void *handle)
4606{
4607 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4608
4609 return gfx_v7_0_hw_fini(adev);
4610}
4611
4612static int gfx_v7_0_resume(void *handle)
4613{
4614 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4615
4616 return gfx_v7_0_hw_init(adev);
4617}
4618
4619static bool gfx_v7_0_is_idle(void *handle)
4620{
4621 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4622
4623 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4624 return false;
4625 else
4626 return true;
4627}
4628
4629static int gfx_v7_0_wait_for_idle(void *handle)
4630{
4631 unsigned i;
4632 u32 tmp;
4633 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4634
4635 for (i = 0; i < adev->usec_timeout; i++) {
4636
4637 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4638
4639 if (!tmp)
4640 return 0;
4641 udelay(1);
4642 }
4643 return -ETIMEDOUT;
4644}
4645
4646static int gfx_v7_0_soft_reset(void *handle)
4647{
4648 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4649 u32 tmp;
4650 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4651
4652
4653 tmp = RREG32(mmGRBM_STATUS);
4654 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4655 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4656 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4657 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4658 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4659 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4660 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4661 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4662
4663 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4664 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4665 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4666 }
4667
4668
4669 tmp = RREG32(mmGRBM_STATUS2);
4670 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4671 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4672
4673
4674 tmp = RREG32(mmSRBM_STATUS);
4675 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4676 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4677
4678 if (grbm_soft_reset || srbm_soft_reset) {
4679
4680 gfx_v7_0_fini_pg(adev);
4681 gfx_v7_0_update_cg(adev, false);
4682
4683
4684 adev->gfx.rlc.funcs->stop(adev);
4685
4686
4687 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4688
4689
4690 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4691
4692 if (grbm_soft_reset) {
4693 tmp = RREG32(mmGRBM_SOFT_RESET);
4694 tmp |= grbm_soft_reset;
4695 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4696 WREG32(mmGRBM_SOFT_RESET, tmp);
4697 tmp = RREG32(mmGRBM_SOFT_RESET);
4698
4699 udelay(50);
4700
4701 tmp &= ~grbm_soft_reset;
4702 WREG32(mmGRBM_SOFT_RESET, tmp);
4703 tmp = RREG32(mmGRBM_SOFT_RESET);
4704 }
4705
4706 if (srbm_soft_reset) {
4707 tmp = RREG32(mmSRBM_SOFT_RESET);
4708 tmp |= srbm_soft_reset;
4709 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4710 WREG32(mmSRBM_SOFT_RESET, tmp);
4711 tmp = RREG32(mmSRBM_SOFT_RESET);
4712
4713 udelay(50);
4714
4715 tmp &= ~srbm_soft_reset;
4716 WREG32(mmSRBM_SOFT_RESET, tmp);
4717 tmp = RREG32(mmSRBM_SOFT_RESET);
4718 }
4719
4720 udelay(50);
4721 }
4722 return 0;
4723}
4724
4725static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4726 enum amdgpu_interrupt_state state)
4727{
4728 u32 cp_int_cntl;
4729
4730 switch (state) {
4731 case AMDGPU_IRQ_STATE_DISABLE:
4732 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4733 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4734 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4735 break;
4736 case AMDGPU_IRQ_STATE_ENABLE:
4737 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4738 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4739 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4740 break;
4741 default:
4742 break;
4743 }
4744}
4745
4746static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4747 int me, int pipe,
4748 enum amdgpu_interrupt_state state)
4749{
4750 u32 mec_int_cntl, mec_int_cntl_reg;
4751
4752
4753
4754
4755
4756
4757
4758 if (me == 1) {
4759 switch (pipe) {
4760 case 0:
4761 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4762 break;
4763 case 1:
4764 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4765 break;
4766 case 2:
4767 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4768 break;
4769 case 3:
4770 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4771 break;
4772 default:
4773 DRM_DEBUG("invalid pipe %d\n", pipe);
4774 return;
4775 }
4776 } else {
4777 DRM_DEBUG("invalid me %d\n", me);
4778 return;
4779 }
4780
4781 switch (state) {
4782 case AMDGPU_IRQ_STATE_DISABLE:
4783 mec_int_cntl = RREG32(mec_int_cntl_reg);
4784 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4785 WREG32(mec_int_cntl_reg, mec_int_cntl);
4786 break;
4787 case AMDGPU_IRQ_STATE_ENABLE:
4788 mec_int_cntl = RREG32(mec_int_cntl_reg);
4789 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4790 WREG32(mec_int_cntl_reg, mec_int_cntl);
4791 break;
4792 default:
4793 break;
4794 }
4795}
4796
4797static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4798 struct amdgpu_irq_src *src,
4799 unsigned type,
4800 enum amdgpu_interrupt_state state)
4801{
4802 u32 cp_int_cntl;
4803
4804 switch (state) {
4805 case AMDGPU_IRQ_STATE_DISABLE:
4806 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4807 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4808 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4809 break;
4810 case AMDGPU_IRQ_STATE_ENABLE:
4811 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4812 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4813 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4814 break;
4815 default:
4816 break;
4817 }
4818
4819 return 0;
4820}
4821
4822static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4823 struct amdgpu_irq_src *src,
4824 unsigned type,
4825 enum amdgpu_interrupt_state state)
4826{
4827 u32 cp_int_cntl;
4828
4829 switch (state) {
4830 case AMDGPU_IRQ_STATE_DISABLE:
4831 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4832 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4833 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4834 break;
4835 case AMDGPU_IRQ_STATE_ENABLE:
4836 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4837 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4838 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4839 break;
4840 default:
4841 break;
4842 }
4843
4844 return 0;
4845}
4846
4847static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4848 struct amdgpu_irq_src *src,
4849 unsigned type,
4850 enum amdgpu_interrupt_state state)
4851{
4852 switch (type) {
4853 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4854 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4855 break;
4856 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4857 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4858 break;
4859 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4860 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4861 break;
4862 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4863 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4864 break;
4865 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4866 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4867 break;
4868 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4869 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4870 break;
4871 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4872 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4873 break;
4874 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4875 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4876 break;
4877 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4878 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4879 break;
4880 default:
4881 break;
4882 }
4883 return 0;
4884}
4885
4886static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4887 struct amdgpu_irq_src *source,
4888 struct amdgpu_iv_entry *entry)
4889{
4890 u8 me_id, pipe_id;
4891 struct amdgpu_ring *ring;
4892 int i;
4893
4894 DRM_DEBUG("IH: CP EOP\n");
4895 me_id = (entry->ring_id & 0x0c) >> 2;
4896 pipe_id = (entry->ring_id & 0x03) >> 0;
4897 switch (me_id) {
4898 case 0:
4899 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4900 break;
4901 case 1:
4902 case 2:
4903 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4904 ring = &adev->gfx.compute_ring[i];
4905 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4906 amdgpu_fence_process(ring);
4907 }
4908 break;
4909 }
4910 return 0;
4911}
4912
4913static void gfx_v7_0_fault(struct amdgpu_device *adev,
4914 struct amdgpu_iv_entry *entry)
4915{
4916 struct amdgpu_ring *ring;
4917 u8 me_id, pipe_id;
4918 int i;
4919
4920 me_id = (entry->ring_id & 0x0c) >> 2;
4921 pipe_id = (entry->ring_id & 0x03) >> 0;
4922 switch (me_id) {
4923 case 0:
4924 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4925 break;
4926 case 1:
4927 case 2:
4928 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4929 ring = &adev->gfx.compute_ring[i];
4930 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4931 drm_sched_fault(&ring->sched);
4932 }
4933 break;
4934 }
4935}
4936
4937static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4938 struct amdgpu_irq_src *source,
4939 struct amdgpu_iv_entry *entry)
4940{
4941 DRM_ERROR("Illegal register access in command stream\n");
4942 gfx_v7_0_fault(adev, entry);
4943 return 0;
4944}
4945
4946static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4947 struct amdgpu_irq_src *source,
4948 struct amdgpu_iv_entry *entry)
4949{
4950 DRM_ERROR("Illegal instruction in command stream\n");
4951
4952 gfx_v7_0_fault(adev, entry);
4953 return 0;
4954}
4955
4956static int gfx_v7_0_set_clockgating_state(void *handle,
4957 enum amd_clockgating_state state)
4958{
4959 bool gate = false;
4960 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4961
4962 if (state == AMD_CG_STATE_GATE)
4963 gate = true;
4964
4965 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4966
4967 if (gate) {
4968 gfx_v7_0_enable_mgcg(adev, true);
4969 gfx_v7_0_enable_cgcg(adev, true);
4970 } else {
4971 gfx_v7_0_enable_cgcg(adev, false);
4972 gfx_v7_0_enable_mgcg(adev, false);
4973 }
4974 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4975
4976 return 0;
4977}
4978
4979static int gfx_v7_0_set_powergating_state(void *handle,
4980 enum amd_powergating_state state)
4981{
4982 bool gate = false;
4983 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4984
4985 if (state == AMD_PG_STATE_GATE)
4986 gate = true;
4987
4988 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4989 AMD_PG_SUPPORT_GFX_SMG |
4990 AMD_PG_SUPPORT_GFX_DMG |
4991 AMD_PG_SUPPORT_CP |
4992 AMD_PG_SUPPORT_GDS |
4993 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4994 gfx_v7_0_update_gfx_pg(adev, gate);
4995 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4996 gfx_v7_0_enable_cp_pg(adev, gate);
4997 gfx_v7_0_enable_gds_pg(adev, gate);
4998 }
4999 }
5000
5001 return 0;
5002}
5003
5004static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5005 .name = "gfx_v7_0",
5006 .early_init = gfx_v7_0_early_init,
5007 .late_init = gfx_v7_0_late_init,
5008 .sw_init = gfx_v7_0_sw_init,
5009 .sw_fini = gfx_v7_0_sw_fini,
5010 .hw_init = gfx_v7_0_hw_init,
5011 .hw_fini = gfx_v7_0_hw_fini,
5012 .suspend = gfx_v7_0_suspend,
5013 .resume = gfx_v7_0_resume,
5014 .is_idle = gfx_v7_0_is_idle,
5015 .wait_for_idle = gfx_v7_0_wait_for_idle,
5016 .soft_reset = gfx_v7_0_soft_reset,
5017 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5018 .set_powergating_state = gfx_v7_0_set_powergating_state,
5019};
5020
5021static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5022 .type = AMDGPU_RING_TYPE_GFX,
5023 .align_mask = 0xff,
5024 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5025 .support_64bit_ptrs = false,
5026 .get_rptr = gfx_v7_0_ring_get_rptr,
5027 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5028 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5029 .emit_frame_size =
5030 20 +
5031 7 +
5032 5 +
5033 12 + 12 + 12 +
5034 7 + 4 +
5035 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
5036 3 + 4,
5037 .emit_ib_size = 4,
5038 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5039 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5040 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5041 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5042 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5043 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5044 .test_ring = gfx_v7_0_ring_test_ring,
5045 .test_ib = gfx_v7_0_ring_test_ib,
5046 .insert_nop = amdgpu_ring_insert_nop,
5047 .pad_ib = amdgpu_ring_generic_pad_ib,
5048 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5049 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5050 .soft_recovery = gfx_v7_0_ring_soft_recovery,
5051};
5052
5053static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5054 .type = AMDGPU_RING_TYPE_COMPUTE,
5055 .align_mask = 0xff,
5056 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5057 .support_64bit_ptrs = false,
5058 .get_rptr = gfx_v7_0_ring_get_rptr,
5059 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5060 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5061 .emit_frame_size =
5062 20 +
5063 7 +
5064 5 +
5065 7 +
5066 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5067 7 + 7 + 7,
5068 .emit_ib_size = 7,
5069 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5070 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5071 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5072 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5073 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5074 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5075 .test_ring = gfx_v7_0_ring_test_ring,
5076 .test_ib = gfx_v7_0_ring_test_ib,
5077 .insert_nop = amdgpu_ring_insert_nop,
5078 .pad_ib = amdgpu_ring_generic_pad_ib,
5079 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5080};
5081
5082static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5083{
5084 int i;
5085
5086 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5087 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5088 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5089 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5090}
5091
5092static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5093 .set = gfx_v7_0_set_eop_interrupt_state,
5094 .process = gfx_v7_0_eop_irq,
5095};
5096
5097static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5098 .set = gfx_v7_0_set_priv_reg_fault_state,
5099 .process = gfx_v7_0_priv_reg_irq,
5100};
5101
5102static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5103 .set = gfx_v7_0_set_priv_inst_fault_state,
5104 .process = gfx_v7_0_priv_inst_irq,
5105};
5106
5107static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5108{
5109 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5110 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5111
5112 adev->gfx.priv_reg_irq.num_types = 1;
5113 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5114
5115 adev->gfx.priv_inst_irq.num_types = 1;
5116 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5117}
5118
5119static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5120{
5121
5122 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
5123 adev->gds.gws_size = 64;
5124 adev->gds.oa_size = 16;
5125 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5126}
5127
5128
5129static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5130{
5131 int i, j, k, counter, active_cu_number = 0;
5132 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5133 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5134 unsigned disable_masks[4 * 2];
5135 u32 ao_cu_num;
5136
5137 if (adev->flags & AMD_IS_APU)
5138 ao_cu_num = 2;
5139 else
5140 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5141
5142 memset(cu_info, 0, sizeof(*cu_info));
5143
5144 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5145
5146 mutex_lock(&adev->grbm_idx_mutex);
5147 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5148 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5149 mask = 1;
5150 ao_bitmap = 0;
5151 counter = 0;
5152 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5153 if (i < 4 && j < 2)
5154 gfx_v7_0_set_user_cu_inactive_bitmap(
5155 adev, disable_masks[i * 2 + j]);
5156 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5157 cu_info->bitmap[i][j] = bitmap;
5158
5159 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5160 if (bitmap & mask) {
5161 if (counter < ao_cu_num)
5162 ao_bitmap |= mask;
5163 counter ++;
5164 }
5165 mask <<= 1;
5166 }
5167 active_cu_number += counter;
5168 if (i < 2 && j < 2)
5169 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5170 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5171 }
5172 }
5173 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5174 mutex_unlock(&adev->grbm_idx_mutex);
5175
5176 cu_info->number = active_cu_number;
5177 cu_info->ao_cu_mask = ao_cu_mask;
5178 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5179 cu_info->max_waves_per_simd = 10;
5180 cu_info->max_scratch_slots_per_cu = 32;
5181 cu_info->wave_front_size = 64;
5182 cu_info->lds_size = 64;
5183}
5184
5185const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5186{
5187 .type = AMD_IP_BLOCK_TYPE_GFX,
5188 .major = 7,
5189 .minor = 0,
5190 .rev = 0,
5191 .funcs = &gfx_v7_0_ip_funcs,
5192};
5193
5194const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5195{
5196 .type = AMD_IP_BLOCK_TYPE_GFX,
5197 .major = 7,
5198 .minor = 1,
5199 .rev = 0,
5200 .funcs = &gfx_v7_0_ip_funcs,
5201};
5202
5203const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5204{
5205 .type = AMD_IP_BLOCK_TYPE_GFX,
5206 .major = 7,
5207 .minor = 2,
5208 .rev = 0,
5209 .funcs = &gfx_v7_0_ip_funcs,
5210};
5211
5212const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5213{
5214 .type = AMD_IP_BLOCK_TYPE_GFX,
5215 .major = 7,
5216 .minor = 3,
5217 .rev = 0,
5218 .funcs = &gfx_v7_0_ip_funcs,
5219};
5220