1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/firmware.h>
24#include <drm/drmP.h>
25#include "amdgpu.h"
26#include "amdgpu_ih.h"
27#include "amdgpu_gfx.h"
28#include "cikd.h"
29#include "cik.h"
30#include "cik_structs.h"
31#include "atom.h"
32#include "amdgpu_ucode.h"
33#include "clearstate_ci.h"
34
35#include "dce/dce_8_0_d.h"
36#include "dce/dce_8_0_sh_mask.h"
37
38#include "bif/bif_4_1_d.h"
39#include "bif/bif_4_1_sh_mask.h"
40
41#include "gca/gfx_7_0_d.h"
42#include "gca/gfx_7_2_enum.h"
43#include "gca/gfx_7_2_sh_mask.h"
44
45#include "gmc/gmc_7_0_d.h"
46#include "gmc/gmc_7_0_sh_mask.h"
47
48#include "oss/oss_2_0_d.h"
49#include "oss/oss_2_0_sh_mask.h"
50
51#define NUM_SIMD_PER_CU 0x4
52
53#define GFX7_NUM_GFX_RINGS 1
54#define GFX7_MEC_HPD_SIZE 2048
55
56static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
57static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
58static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
59
60MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
61MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
62MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
63MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
64MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
65
66MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
67MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
68MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
69MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
70MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
71
72MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
73MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
74MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
75MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
76MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
77MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
78
79MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
80MODULE_FIRMWARE("amdgpu/kabini_me.bin");
81MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
82MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
83MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
84
85MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
86MODULE_FIRMWARE("amdgpu/mullins_me.bin");
87MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
88MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
89MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
90
91static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
92{
93 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
94 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
95 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
96 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
97 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
98 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
99 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
100 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
101 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
102 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
103 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
104 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
105 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
106 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
107 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
108 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
109};
110
111static const u32 spectre_rlc_save_restore_register_list[] =
112{
113 (0x0e00 << 16) | (0xc12c >> 2),
114 0x00000000,
115 (0x0e00 << 16) | (0xc140 >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc150 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0xc15c >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0xc168 >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0xc170 >> 2),
124 0x00000000,
125 (0x0e00 << 16) | (0xc178 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0xc204 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0xc2b4 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc2b8 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0xc2bc >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc2c0 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0x8228 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x829c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x869c >> 2),
142 0x00000000,
143 (0x0600 << 16) | (0x98f4 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0x98f8 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0x9900 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc260 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0x90e8 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0x3c000 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0x3c00c >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x8c1c >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x9700 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0xcd20 >> 2),
162 0x00000000,
163 (0x4e00 << 16) | (0xcd20 >> 2),
164 0x00000000,
165 (0x5e00 << 16) | (0xcd20 >> 2),
166 0x00000000,
167 (0x6e00 << 16) | (0xcd20 >> 2),
168 0x00000000,
169 (0x7e00 << 16) | (0xcd20 >> 2),
170 0x00000000,
171 (0x8e00 << 16) | (0xcd20 >> 2),
172 0x00000000,
173 (0x9e00 << 16) | (0xcd20 >> 2),
174 0x00000000,
175 (0xae00 << 16) | (0xcd20 >> 2),
176 0x00000000,
177 (0xbe00 << 16) | (0xcd20 >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x89bc >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x8900 >> 2),
182 0x00000000,
183 0x3,
184 (0x0e00 << 16) | (0xc130 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc134 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc1fc >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc208 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc264 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc268 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc26c >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc270 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc274 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc278 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc27c >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc280 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc284 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc288 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc28c >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc290 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc294 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc298 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc29c >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc2a0 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc2a4 >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc2a8 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc2ac >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc2b0 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0x301d0 >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0x30238 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0x30250 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0x30254 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0x30258 >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0x3025c >> 2),
243 0x00000000,
244 (0x4e00 << 16) | (0xc900 >> 2),
245 0x00000000,
246 (0x5e00 << 16) | (0xc900 >> 2),
247 0x00000000,
248 (0x6e00 << 16) | (0xc900 >> 2),
249 0x00000000,
250 (0x7e00 << 16) | (0xc900 >> 2),
251 0x00000000,
252 (0x8e00 << 16) | (0xc900 >> 2),
253 0x00000000,
254 (0x9e00 << 16) | (0xc900 >> 2),
255 0x00000000,
256 (0xae00 << 16) | (0xc900 >> 2),
257 0x00000000,
258 (0xbe00 << 16) | (0xc900 >> 2),
259 0x00000000,
260 (0x4e00 << 16) | (0xc904 >> 2),
261 0x00000000,
262 (0x5e00 << 16) | (0xc904 >> 2),
263 0x00000000,
264 (0x6e00 << 16) | (0xc904 >> 2),
265 0x00000000,
266 (0x7e00 << 16) | (0xc904 >> 2),
267 0x00000000,
268 (0x8e00 << 16) | (0xc904 >> 2),
269 0x00000000,
270 (0x9e00 << 16) | (0xc904 >> 2),
271 0x00000000,
272 (0xae00 << 16) | (0xc904 >> 2),
273 0x00000000,
274 (0xbe00 << 16) | (0xc904 >> 2),
275 0x00000000,
276 (0x4e00 << 16) | (0xc908 >> 2),
277 0x00000000,
278 (0x5e00 << 16) | (0xc908 >> 2),
279 0x00000000,
280 (0x6e00 << 16) | (0xc908 >> 2),
281 0x00000000,
282 (0x7e00 << 16) | (0xc908 >> 2),
283 0x00000000,
284 (0x8e00 << 16) | (0xc908 >> 2),
285 0x00000000,
286 (0x9e00 << 16) | (0xc908 >> 2),
287 0x00000000,
288 (0xae00 << 16) | (0xc908 >> 2),
289 0x00000000,
290 (0xbe00 << 16) | (0xc908 >> 2),
291 0x00000000,
292 (0x4e00 << 16) | (0xc90c >> 2),
293 0x00000000,
294 (0x5e00 << 16) | (0xc90c >> 2),
295 0x00000000,
296 (0x6e00 << 16) | (0xc90c >> 2),
297 0x00000000,
298 (0x7e00 << 16) | (0xc90c >> 2),
299 0x00000000,
300 (0x8e00 << 16) | (0xc90c >> 2),
301 0x00000000,
302 (0x9e00 << 16) | (0xc90c >> 2),
303 0x00000000,
304 (0xae00 << 16) | (0xc90c >> 2),
305 0x00000000,
306 (0xbe00 << 16) | (0xc90c >> 2),
307 0x00000000,
308 (0x4e00 << 16) | (0xc910 >> 2),
309 0x00000000,
310 (0x5e00 << 16) | (0xc910 >> 2),
311 0x00000000,
312 (0x6e00 << 16) | (0xc910 >> 2),
313 0x00000000,
314 (0x7e00 << 16) | (0xc910 >> 2),
315 0x00000000,
316 (0x8e00 << 16) | (0xc910 >> 2),
317 0x00000000,
318 (0x9e00 << 16) | (0xc910 >> 2),
319 0x00000000,
320 (0xae00 << 16) | (0xc910 >> 2),
321 0x00000000,
322 (0xbe00 << 16) | (0xc910 >> 2),
323 0x00000000,
324 (0x0e00 << 16) | (0xc99c >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0x9834 >> 2),
327 0x00000000,
328 (0x0000 << 16) | (0x30f00 >> 2),
329 0x00000000,
330 (0x0001 << 16) | (0x30f00 >> 2),
331 0x00000000,
332 (0x0000 << 16) | (0x30f04 >> 2),
333 0x00000000,
334 (0x0001 << 16) | (0x30f04 >> 2),
335 0x00000000,
336 (0x0000 << 16) | (0x30f08 >> 2),
337 0x00000000,
338 (0x0001 << 16) | (0x30f08 >> 2),
339 0x00000000,
340 (0x0000 << 16) | (0x30f0c >> 2),
341 0x00000000,
342 (0x0001 << 16) | (0x30f0c >> 2),
343 0x00000000,
344 (0x0600 << 16) | (0x9b7c >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x8a14 >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8a18 >> 2),
349 0x00000000,
350 (0x0600 << 16) | (0x30a00 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0x8bf0 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x8bcc >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0x8b24 >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0x30a04 >> 2),
359 0x00000000,
360 (0x0600 << 16) | (0x30a10 >> 2),
361 0x00000000,
362 (0x0600 << 16) | (0x30a14 >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x30a18 >> 2),
365 0x00000000,
366 (0x0600 << 16) | (0x30a2c >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0xc700 >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc704 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc708 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc768 >> 2),
375 0x00000000,
376 (0x0400 << 16) | (0xc770 >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc774 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc778 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc77c >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc780 >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc784 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc788 >> 2),
389 0x00000000,
390 (0x0400 << 16) | (0xc78c >> 2),
391 0x00000000,
392 (0x0400 << 16) | (0xc798 >> 2),
393 0x00000000,
394 (0x0400 << 16) | (0xc79c >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc7a0 >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc7a4 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc7a8 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc7ac >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc7b0 >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc7b4 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0x9100 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x3c010 >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x92a8 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x92ac >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x92b4 >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x92b8 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x92bc >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x92c0 >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0x92c4 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x92c8 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x92cc >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x92d0 >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x8c00 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x8c04 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x8c20 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x8c38 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x8c3c >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0xae00 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x9604 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0xac08 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac0c >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac10 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac14 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac58 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac68 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0xac6c >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0xac70 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xac74 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0xac78 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac7c >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac80 >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac84 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac88 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac8c >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0x970c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0x9714 >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0x9718 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0x971c >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0x31068 >> 2),
485 0x00000000,
486 (0x4e00 << 16) | (0x31068 >> 2),
487 0x00000000,
488 (0x5e00 << 16) | (0x31068 >> 2),
489 0x00000000,
490 (0x6e00 << 16) | (0x31068 >> 2),
491 0x00000000,
492 (0x7e00 << 16) | (0x31068 >> 2),
493 0x00000000,
494 (0x8e00 << 16) | (0x31068 >> 2),
495 0x00000000,
496 (0x9e00 << 16) | (0x31068 >> 2),
497 0x00000000,
498 (0xae00 << 16) | (0x31068 >> 2),
499 0x00000000,
500 (0xbe00 << 16) | (0x31068 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0xcd10 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0xcd14 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x88b0 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x88b4 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x88b8 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x88bc >> 2),
513 0x00000000,
514 (0x0400 << 16) | (0x89c0 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x88c4 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x88c8 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x88d0 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x88d4 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x88d8 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x8980 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x30938 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x3093c >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x30940 >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x89a0 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x30900 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x30904 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x89b4 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x3c210 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x3c214 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x3c218 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x8904 >> 2),
549 0x00000000,
550 0x5,
551 (0x0e00 << 16) | (0x8c28 >> 2),
552 (0x0e00 << 16) | (0x8c2c >> 2),
553 (0x0e00 << 16) | (0x8c30 >> 2),
554 (0x0e00 << 16) | (0x8c34 >> 2),
555 (0x0e00 << 16) | (0x9600 >> 2),
556};
557
558static const u32 kalindi_rlc_save_restore_register_list[] =
559{
560 (0x0e00 << 16) | (0xc12c >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0xc140 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0xc150 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0xc15c >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0xc168 >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0xc170 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0xc204 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0xc2b4 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc2b8 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0xc2bc >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0xc2c0 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x8228 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x829c >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x869c >> 2),
587 0x00000000,
588 (0x0600 << 16) | (0x98f4 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0x98f8 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x9900 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0xc260 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0x90e8 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x3c000 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x3c00c >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x8c1c >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x9700 >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0xcd20 >> 2),
607 0x00000000,
608 (0x4e00 << 16) | (0xcd20 >> 2),
609 0x00000000,
610 (0x5e00 << 16) | (0xcd20 >> 2),
611 0x00000000,
612 (0x6e00 << 16) | (0xcd20 >> 2),
613 0x00000000,
614 (0x7e00 << 16) | (0xcd20 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x89bc >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x8900 >> 2),
619 0x00000000,
620 0x3,
621 (0x0e00 << 16) | (0xc130 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc134 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc1fc >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc208 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc264 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc268 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc26c >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc270 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xc274 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xc28c >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xc290 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc294 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc298 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc2a0 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc2a4 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc2a8 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc2ac >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x301d0 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x30238 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x30250 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x30254 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x30258 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0x3025c >> 2),
666 0x00000000,
667 (0x4e00 << 16) | (0xc900 >> 2),
668 0x00000000,
669 (0x5e00 << 16) | (0xc900 >> 2),
670 0x00000000,
671 (0x6e00 << 16) | (0xc900 >> 2),
672 0x00000000,
673 (0x7e00 << 16) | (0xc900 >> 2),
674 0x00000000,
675 (0x4e00 << 16) | (0xc904 >> 2),
676 0x00000000,
677 (0x5e00 << 16) | (0xc904 >> 2),
678 0x00000000,
679 (0x6e00 << 16) | (0xc904 >> 2),
680 0x00000000,
681 (0x7e00 << 16) | (0xc904 >> 2),
682 0x00000000,
683 (0x4e00 << 16) | (0xc908 >> 2),
684 0x00000000,
685 (0x5e00 << 16) | (0xc908 >> 2),
686 0x00000000,
687 (0x6e00 << 16) | (0xc908 >> 2),
688 0x00000000,
689 (0x7e00 << 16) | (0xc908 >> 2),
690 0x00000000,
691 (0x4e00 << 16) | (0xc90c >> 2),
692 0x00000000,
693 (0x5e00 << 16) | (0xc90c >> 2),
694 0x00000000,
695 (0x6e00 << 16) | (0xc90c >> 2),
696 0x00000000,
697 (0x7e00 << 16) | (0xc90c >> 2),
698 0x00000000,
699 (0x4e00 << 16) | (0xc910 >> 2),
700 0x00000000,
701 (0x5e00 << 16) | (0xc910 >> 2),
702 0x00000000,
703 (0x6e00 << 16) | (0xc910 >> 2),
704 0x00000000,
705 (0x7e00 << 16) | (0xc910 >> 2),
706 0x00000000,
707 (0x0e00 << 16) | (0xc99c >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0x9834 >> 2),
710 0x00000000,
711 (0x0000 << 16) | (0x30f00 >> 2),
712 0x00000000,
713 (0x0000 << 16) | (0x30f04 >> 2),
714 0x00000000,
715 (0x0000 << 16) | (0x30f08 >> 2),
716 0x00000000,
717 (0x0000 << 16) | (0x30f0c >> 2),
718 0x00000000,
719 (0x0600 << 16) | (0x9b7c >> 2),
720 0x00000000,
721 (0x0e00 << 16) | (0x8a14 >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0x8a18 >> 2),
724 0x00000000,
725 (0x0600 << 16) | (0x30a00 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0x8bf0 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x8bcc >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0x8b24 >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x30a04 >> 2),
734 0x00000000,
735 (0x0600 << 16) | (0x30a10 >> 2),
736 0x00000000,
737 (0x0600 << 16) | (0x30a14 >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x30a18 >> 2),
740 0x00000000,
741 (0x0600 << 16) | (0x30a2c >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0xc700 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc704 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc708 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0xc768 >> 2),
750 0x00000000,
751 (0x0400 << 16) | (0xc770 >> 2),
752 0x00000000,
753 (0x0400 << 16) | (0xc774 >> 2),
754 0x00000000,
755 (0x0400 << 16) | (0xc798 >> 2),
756 0x00000000,
757 (0x0400 << 16) | (0xc79c >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0x9100 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x3c010 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x8c00 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x8c04 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x8c20 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c38 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x8c3c >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xae00 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0x9604 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xac08 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac0c >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac10 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac14 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac58 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac68 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xac6c >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xac70 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xac74 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xac78 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac7c >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac80 >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac84 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac88 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac8c >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0x970c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0x9714 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x9718 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x971c >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x31068 >> 2),
816 0x00000000,
817 (0x4e00 << 16) | (0x31068 >> 2),
818 0x00000000,
819 (0x5e00 << 16) | (0x31068 >> 2),
820 0x00000000,
821 (0x6e00 << 16) | (0x31068 >> 2),
822 0x00000000,
823 (0x7e00 << 16) | (0x31068 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xcd10 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xcd14 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x88b0 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x88b4 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x88b8 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x88bc >> 2),
836 0x00000000,
837 (0x0400 << 16) | (0x89c0 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x88c4 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x88c8 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x88d0 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x88d4 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x88d8 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x8980 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x30938 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x3093c >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x30940 >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x89a0 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x30900 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x30904 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x89b4 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x3e1fc >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x3c210 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x3c214 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x3c218 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x8904 >> 2),
874 0x00000000,
875 0x5,
876 (0x0e00 << 16) | (0x8c28 >> 2),
877 (0x0e00 << 16) | (0x8c2c >> 2),
878 (0x0e00 << 16) | (0x8c30 >> 2),
879 (0x0e00 << 16) | (0x8c34 >> 2),
880 (0x0e00 << 16) | (0x9600 >> 2),
881};
882
883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
885static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
886static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
887
888
889
890
891
892
893
894
895
896
897
898
899
900static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
901{
902 const char *chip_name;
903 char fw_name[30];
904 int err;
905
906 DRM_DEBUG("\n");
907
908 switch (adev->asic_type) {
909 case CHIP_BONAIRE:
910 chip_name = "bonaire";
911 break;
912 case CHIP_HAWAII:
913 chip_name = "hawaii";
914 break;
915 case CHIP_KAVERI:
916 chip_name = "kaveri";
917 break;
918 case CHIP_KABINI:
919 chip_name = "kabini";
920 break;
921 case CHIP_MULLINS:
922 chip_name = "mullins";
923 break;
924 default: BUG();
925 }
926
927 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
928 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
929 if (err)
930 goto out;
931 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
932 if (err)
933 goto out;
934
935 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
936 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937 if (err)
938 goto out;
939 err = amdgpu_ucode_validate(adev->gfx.me_fw);
940 if (err)
941 goto out;
942
943 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
944 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
945 if (err)
946 goto out;
947 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
948 if (err)
949 goto out;
950
951 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
952 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
953 if (err)
954 goto out;
955 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
956 if (err)
957 goto out;
958
959 if (adev->asic_type == CHIP_KAVERI) {
960 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
961 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
962 if (err)
963 goto out;
964 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
965 if (err)
966 goto out;
967 }
968
969 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
970 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
971 if (err)
972 goto out;
973 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
974
975out:
976 if (err) {
977 pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
978 release_firmware(adev->gfx.pfp_fw);
979 adev->gfx.pfp_fw = NULL;
980 release_firmware(adev->gfx.me_fw);
981 adev->gfx.me_fw = NULL;
982 release_firmware(adev->gfx.ce_fw);
983 adev->gfx.ce_fw = NULL;
984 release_firmware(adev->gfx.mec_fw);
985 adev->gfx.mec_fw = NULL;
986 release_firmware(adev->gfx.mec2_fw);
987 adev->gfx.mec2_fw = NULL;
988 release_firmware(adev->gfx.rlc_fw);
989 adev->gfx.rlc_fw = NULL;
990 }
991 return err;
992}
993
994static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
995{
996 release_firmware(adev->gfx.pfp_fw);
997 adev->gfx.pfp_fw = NULL;
998 release_firmware(adev->gfx.me_fw);
999 adev->gfx.me_fw = NULL;
1000 release_firmware(adev->gfx.ce_fw);
1001 adev->gfx.ce_fw = NULL;
1002 release_firmware(adev->gfx.mec_fw);
1003 adev->gfx.mec_fw = NULL;
1004 release_firmware(adev->gfx.mec2_fw);
1005 adev->gfx.mec2_fw = NULL;
1006 release_firmware(adev->gfx.rlc_fw);
1007 adev->gfx.rlc_fw = NULL;
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1022{
1023 const u32 num_tile_mode_states =
1024 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1025 const u32 num_secondary_tile_mode_states =
1026 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1027 u32 reg_offset, split_equal_to_row_size;
1028 uint32_t *tile, *macrotile;
1029
1030 tile = adev->gfx.config.tile_mode_array;
1031 macrotile = adev->gfx.config.macrotile_mode_array;
1032
1033 switch (adev->gfx.config.mem_row_size_in_kb) {
1034 case 1:
1035 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1036 break;
1037 case 2:
1038 default:
1039 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1040 break;
1041 case 4:
1042 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1043 break;
1044 }
1045
1046 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1047 tile[reg_offset] = 0;
1048 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1049 macrotile[reg_offset] = 0;
1050
1051 switch (adev->asic_type) {
1052 case CHIP_BONAIRE:
1053 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1054 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1057 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1058 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1061 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1062 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1063 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1065 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1066 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1067 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1069 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1070 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1071 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1072 TILE_SPLIT(split_equal_to_row_size));
1073 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1074 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1075 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1076 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1077 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1079 TILE_SPLIT(split_equal_to_row_size));
1080 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1081 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1082 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1083 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1084 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1085 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1086 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1087 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1088 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1090 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1092 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1094 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1095 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1096 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1097 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1098 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1099 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1100 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1102 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1106 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1107 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1108 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1110 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1111 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1112 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1115 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1116 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1117 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1118 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1119 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1120 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1122 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1123 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1124 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1126 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1127 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1128 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1130 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1131 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1132 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1135 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1136 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1139 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1140 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1141 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1143 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1144 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1145 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1146 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1148 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1150 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1151 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1154 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1155
1156 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1159 NUM_BANKS(ADDR_SURF_16_BANK));
1160 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1163 NUM_BANKS(ADDR_SURF_16_BANK));
1164 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1167 NUM_BANKS(ADDR_SURF_16_BANK));
1168 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1171 NUM_BANKS(ADDR_SURF_16_BANK));
1172 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1175 NUM_BANKS(ADDR_SURF_16_BANK));
1176 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1179 NUM_BANKS(ADDR_SURF_8_BANK));
1180 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1183 NUM_BANKS(ADDR_SURF_4_BANK));
1184 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1187 NUM_BANKS(ADDR_SURF_16_BANK));
1188 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1191 NUM_BANKS(ADDR_SURF_16_BANK));
1192 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1195 NUM_BANKS(ADDR_SURF_16_BANK));
1196 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1199 NUM_BANKS(ADDR_SURF_16_BANK));
1200 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1203 NUM_BANKS(ADDR_SURF_16_BANK));
1204 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1207 NUM_BANKS(ADDR_SURF_8_BANK));
1208 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1211 NUM_BANKS(ADDR_SURF_4_BANK));
1212
1213 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1214 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1215 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1216 if (reg_offset != 7)
1217 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1218 break;
1219 case CHIP_HAWAII:
1220 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1221 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1224 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1225 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1228 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1229 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1232 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1233 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1236 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1237 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1238 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1239 TILE_SPLIT(split_equal_to_row_size));
1240 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1242 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1243 TILE_SPLIT(split_equal_to_row_size));
1244 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1245 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1246 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1247 TILE_SPLIT(split_equal_to_row_size));
1248 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1249 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1251 TILE_SPLIT(split_equal_to_row_size));
1252 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1253 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1254 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1257 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1259 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1261 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1263 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1265 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1266 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1267 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1269 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1271 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1272 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1276 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1277 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1280 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1281 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1282 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1284 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1285 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1288 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1289 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1292 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1293 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1295 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1299 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1300 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1303 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1307 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1308 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1311 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1315 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1317 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1319 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1320 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1321 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1323 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1325 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1326 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1330 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1334 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1335 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1338
1339 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1342 NUM_BANKS(ADDR_SURF_16_BANK));
1343 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1346 NUM_BANKS(ADDR_SURF_16_BANK));
1347 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1350 NUM_BANKS(ADDR_SURF_16_BANK));
1351 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1354 NUM_BANKS(ADDR_SURF_16_BANK));
1355 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1358 NUM_BANKS(ADDR_SURF_8_BANK));
1359 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1362 NUM_BANKS(ADDR_SURF_4_BANK));
1363 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1366 NUM_BANKS(ADDR_SURF_4_BANK));
1367 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1370 NUM_BANKS(ADDR_SURF_16_BANK));
1371 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1374 NUM_BANKS(ADDR_SURF_16_BANK));
1375 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1378 NUM_BANKS(ADDR_SURF_16_BANK));
1379 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1382 NUM_BANKS(ADDR_SURF_8_BANK));
1383 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1386 NUM_BANKS(ADDR_SURF_16_BANK));
1387 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1390 NUM_BANKS(ADDR_SURF_8_BANK));
1391 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1394 NUM_BANKS(ADDR_SURF_4_BANK));
1395
1396 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1397 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1398 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1399 if (reg_offset != 7)
1400 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1401 break;
1402 case CHIP_KABINI:
1403 case CHIP_KAVERI:
1404 case CHIP_MULLINS:
1405 default:
1406 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1407 PIPE_CONFIG(ADDR_SURF_P2) |
1408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1410 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1411 PIPE_CONFIG(ADDR_SURF_P2) |
1412 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1413 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1414 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1415 PIPE_CONFIG(ADDR_SURF_P2) |
1416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1418 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1419 PIPE_CONFIG(ADDR_SURF_P2) |
1420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1422 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1423 PIPE_CONFIG(ADDR_SURF_P2) |
1424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1425 TILE_SPLIT(split_equal_to_row_size));
1426 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1427 PIPE_CONFIG(ADDR_SURF_P2) |
1428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1429 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1430 PIPE_CONFIG(ADDR_SURF_P2) |
1431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1432 TILE_SPLIT(split_equal_to_row_size));
1433 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1434 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1435 PIPE_CONFIG(ADDR_SURF_P2));
1436 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1437 PIPE_CONFIG(ADDR_SURF_P2) |
1438 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1439 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1440 PIPE_CONFIG(ADDR_SURF_P2) |
1441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1443 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1444 PIPE_CONFIG(ADDR_SURF_P2) |
1445 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1447 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1448 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1449 PIPE_CONFIG(ADDR_SURF_P2) |
1450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1451 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1452 PIPE_CONFIG(ADDR_SURF_P2) |
1453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1455 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1456 PIPE_CONFIG(ADDR_SURF_P2) |
1457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1459 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1460 PIPE_CONFIG(ADDR_SURF_P2) |
1461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1463 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1464 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1465 PIPE_CONFIG(ADDR_SURF_P2) |
1466 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1468 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1469 PIPE_CONFIG(ADDR_SURF_P2) |
1470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1471 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1472 PIPE_CONFIG(ADDR_SURF_P2) |
1473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1475 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1476 PIPE_CONFIG(ADDR_SURF_P2) |
1477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1479 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1480 PIPE_CONFIG(ADDR_SURF_P2) |
1481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1483 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1484 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1485 PIPE_CONFIG(ADDR_SURF_P2) |
1486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1488 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1489 PIPE_CONFIG(ADDR_SURF_P2) |
1490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1492 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1493 PIPE_CONFIG(ADDR_SURF_P2) |
1494 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1496 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1497 PIPE_CONFIG(ADDR_SURF_P2) |
1498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1499 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1500 PIPE_CONFIG(ADDR_SURF_P2) |
1501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1503 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1504 PIPE_CONFIG(ADDR_SURF_P2) |
1505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1507 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1508
1509 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1512 NUM_BANKS(ADDR_SURF_8_BANK));
1513 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1516 NUM_BANKS(ADDR_SURF_8_BANK));
1517 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1520 NUM_BANKS(ADDR_SURF_8_BANK));
1521 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1524 NUM_BANKS(ADDR_SURF_8_BANK));
1525 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1528 NUM_BANKS(ADDR_SURF_8_BANK));
1529 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1532 NUM_BANKS(ADDR_SURF_8_BANK));
1533 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1536 NUM_BANKS(ADDR_SURF_8_BANK));
1537 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1540 NUM_BANKS(ADDR_SURF_16_BANK));
1541 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1544 NUM_BANKS(ADDR_SURF_16_BANK));
1545 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1548 NUM_BANKS(ADDR_SURF_16_BANK));
1549 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1552 NUM_BANKS(ADDR_SURF_16_BANK));
1553 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1556 NUM_BANKS(ADDR_SURF_16_BANK));
1557 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1560 NUM_BANKS(ADDR_SURF_16_BANK));
1561 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1564 NUM_BANKS(ADDR_SURF_8_BANK));
1565
1566 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1567 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1568 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1569 if (reg_offset != 7)
1570 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1571 break;
1572 }
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1587 u32 se_num, u32 sh_num, u32 instance)
1588{
1589 u32 data;
1590
1591 if (instance == 0xffffffff)
1592 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1593 else
1594 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1595
1596 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1597 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1598 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1599 else if (se_num == 0xffffffff)
1600 data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1601 (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1602 else if (sh_num == 0xffffffff)
1603 data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1604 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1605 else
1606 data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1607 (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1608 WREG32(mmGRBM_GFX_INDEX, data);
1609}
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1620{
1621 u32 data, mask;
1622
1623 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1624 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1625
1626 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1627 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1628
1629 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1630 adev->gfx.config.max_sh_per_se);
1631
1632 return (~data) & mask;
1633}
1634
1635static void
1636gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
1637{
1638 switch (adev->asic_type) {
1639 case CHIP_BONAIRE:
1640 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
1641 SE_XSEL(1) | SE_YSEL(1);
1642 *rconf1 |= 0x0;
1643 break;
1644 case CHIP_HAWAII:
1645 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
1646 RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
1647 PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
1648 SE_YSEL(3);
1649 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
1650 SE_PAIR_YSEL(2);
1651 break;
1652 case CHIP_KAVERI:
1653 *rconf |= RB_MAP_PKR0(2);
1654 *rconf1 |= 0x0;
1655 break;
1656 case CHIP_KABINI:
1657 case CHIP_MULLINS:
1658 *rconf |= 0x0;
1659 *rconf1 |= 0x0;
1660 break;
1661 default:
1662 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1663 break;
1664 }
1665}
1666
1667static void
1668gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1669 u32 raster_config, u32 raster_config_1,
1670 unsigned rb_mask, unsigned num_rb)
1671{
1672 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
1673 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
1674 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
1675 unsigned rb_per_se = num_rb / num_se;
1676 unsigned se_mask[4];
1677 unsigned se;
1678
1679 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
1680 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
1681 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
1682 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
1683
1684 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
1685 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
1686 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
1687
1688 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
1689 (!se_mask[2] && !se_mask[3]))) {
1690 raster_config_1 &= ~SE_PAIR_MAP_MASK;
1691
1692 if (!se_mask[0] && !se_mask[1]) {
1693 raster_config_1 |=
1694 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
1695 } else {
1696 raster_config_1 |=
1697 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
1698 }
1699 }
1700
1701 for (se = 0; se < num_se; se++) {
1702 unsigned raster_config_se = raster_config;
1703 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
1704 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
1705 int idx = (se / 2) * 2;
1706
1707 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1708 raster_config_se &= ~SE_MAP_MASK;
1709
1710 if (!se_mask[idx]) {
1711 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
1712 } else {
1713 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
1714 }
1715 }
1716
1717 pkr0_mask &= rb_mask;
1718 pkr1_mask &= rb_mask;
1719 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1720 raster_config_se &= ~PKR_MAP_MASK;
1721
1722 if (!pkr0_mask) {
1723 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
1724 } else {
1725 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
1726 }
1727 }
1728
1729 if (rb_per_se >= 2) {
1730 unsigned rb0_mask = 1 << (se * rb_per_se);
1731 unsigned rb1_mask = rb0_mask << 1;
1732
1733 rb0_mask &= rb_mask;
1734 rb1_mask &= rb_mask;
1735 if (!rb0_mask || !rb1_mask) {
1736 raster_config_se &= ~RB_MAP_PKR0_MASK;
1737
1738 if (!rb0_mask) {
1739 raster_config_se |=
1740 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
1741 } else {
1742 raster_config_se |=
1743 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
1744 }
1745 }
1746
1747 if (rb_per_se > 2) {
1748 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
1749 rb1_mask = rb0_mask << 1;
1750 rb0_mask &= rb_mask;
1751 rb1_mask &= rb_mask;
1752 if (!rb0_mask || !rb1_mask) {
1753 raster_config_se &= ~RB_MAP_PKR1_MASK;
1754
1755 if (!rb0_mask) {
1756 raster_config_se |=
1757 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
1758 } else {
1759 raster_config_se |=
1760 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
1761 }
1762 }
1763 }
1764 }
1765
1766
1767 gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
1768 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
1769 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1770 }
1771
1772
1773 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1774}
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1786{
1787 int i, j;
1788 u32 data;
1789 u32 raster_config = 0, raster_config_1 = 0;
1790 u32 active_rbs = 0;
1791 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1792 adev->gfx.config.max_sh_per_se;
1793 unsigned num_rb_pipes;
1794
1795 mutex_lock(&adev->grbm_idx_mutex);
1796 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1797 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1798 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1799 data = gfx_v7_0_get_rb_active_bitmap(adev);
1800 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1801 rb_bitmap_width_per_sh);
1802 }
1803 }
1804 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1805
1806 adev->gfx.config.backend_enable_mask = active_rbs;
1807 adev->gfx.config.num_rbs = hweight32(active_rbs);
1808
1809 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
1810 adev->gfx.config.max_shader_engines, 16);
1811
1812 gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
1813
1814 if (!adev->gfx.config.backend_enable_mask ||
1815 adev->gfx.config.num_rbs >= num_rb_pipes) {
1816 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1817 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
1818 } else {
1819 gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
1820 adev->gfx.config.backend_enable_mask,
1821 num_rb_pipes);
1822 }
1823
1824
1825 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1826 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1827 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
1828 adev->gfx.config.rb_config[i][j].rb_backend_disable =
1829 RREG32(mmCC_RB_BACKEND_DISABLE);
1830 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
1831 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1832 adev->gfx.config.rb_config[i][j].raster_config =
1833 RREG32(mmPA_SC_RASTER_CONFIG);
1834 adev->gfx.config.rb_config[i][j].raster_config_1 =
1835 RREG32(mmPA_SC_RASTER_CONFIG_1);
1836 }
1837 }
1838 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1839 mutex_unlock(&adev->grbm_idx_mutex);
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850#define DEFAULT_SH_MEM_BASES (0x6000)
1851#define FIRST_COMPUTE_VMID (8)
1852#define LAST_COMPUTE_VMID (16)
1853static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
1854{
1855 int i;
1856 uint32_t sh_mem_config;
1857 uint32_t sh_mem_bases;
1858
1859
1860
1861
1862
1863
1864
1865 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1866 sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1867 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1868 sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
1869 mutex_lock(&adev->srbm_mutex);
1870 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1871 cik_srbm_select(adev, 0, 0, 0, i);
1872
1873 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
1874 WREG32(mmSH_MEM_APE1_BASE, 1);
1875 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1876 WREG32(mmSH_MEM_BASES, sh_mem_bases);
1877 }
1878 cik_srbm_select(adev, 0, 0, 0, 0);
1879 mutex_unlock(&adev->srbm_mutex);
1880}
1881
1882static void gfx_v7_0_config_init(struct amdgpu_device *adev)
1883{
1884 adev->gfx.config.double_offchip_lds_buf = 1;
1885}
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
1896{
1897 u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
1898 u32 tmp;
1899 int i;
1900
1901 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
1902
1903 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1904 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
1905 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
1906
1907 gfx_v7_0_tiling_mode_table_init(adev);
1908
1909 gfx_v7_0_setup_rb(adev);
1910 gfx_v7_0_get_cu_info(adev);
1911 gfx_v7_0_config_init(adev);
1912
1913
1914 WREG32(mmCP_MEQ_THRESHOLDS,
1915 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
1916 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
1917
1918 mutex_lock(&adev->grbm_idx_mutex);
1919
1920
1921
1922
1923 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1924
1925
1926
1927 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1928 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1929 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
1930 MTYPE_NC);
1931 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
1932 MTYPE_UC);
1933 sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
1934
1935 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
1936 SWIZZLE_ENABLE, 1);
1937 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1938 ELEMENT_SIZE, 1);
1939 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1940 INDEX_STRIDE, 3);
1941 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1942
1943 mutex_lock(&adev->srbm_mutex);
1944 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
1945 if (i == 0)
1946 sh_mem_base = 0;
1947 else
1948 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1949 cik_srbm_select(adev, 0, 0, 0, i);
1950
1951 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
1952 WREG32(mmSH_MEM_APE1_BASE, 1);
1953 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1954 WREG32(mmSH_MEM_BASES, sh_mem_base);
1955 }
1956 cik_srbm_select(adev, 0, 0, 0, 0);
1957 mutex_unlock(&adev->srbm_mutex);
1958
1959 gfx_v7_0_init_compute_vmid(adev);
1960
1961 WREG32(mmSX_DEBUG_1, 0x20);
1962
1963 WREG32(mmTA_CNTL_AUX, 0x00010000);
1964
1965 tmp = RREG32(mmSPI_CONFIG_CNTL);
1966 tmp |= 0x03000000;
1967 WREG32(mmSPI_CONFIG_CNTL, tmp);
1968
1969 WREG32(mmSQ_CONFIG, 1);
1970
1971 WREG32(mmDB_DEBUG, 0);
1972
1973 tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
1974 tmp |= 0x00000400;
1975 WREG32(mmDB_DEBUG2, tmp);
1976
1977 tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
1978 tmp |= 0x00020200;
1979 WREG32(mmDB_DEBUG3, tmp);
1980
1981 tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
1982 tmp |= 0x00018208;
1983 WREG32(mmCB_HW_CONTROL, tmp);
1984
1985 WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
1986
1987 WREG32(mmPA_SC_FIFO_SIZE,
1988 ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1989 (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1990 (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1991 (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
1992
1993 WREG32(mmVGT_NUM_INSTANCES, 1);
1994
1995 WREG32(mmCP_PERFMON_CNTL, 0);
1996
1997 WREG32(mmSQ_CONFIG, 0);
1998
1999 WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2000 ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2001 (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2002
2003 WREG32(mmVGT_CACHE_INVALIDATION,
2004 (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2005 (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2006
2007 WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2008 WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2009
2010 WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2011 (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2012 WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2013
2014 tmp = RREG32(mmSPI_ARB_PRIORITY);
2015 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
2016 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
2017 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
2018 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
2019 WREG32(mmSPI_ARB_PRIORITY, tmp);
2020
2021 mutex_unlock(&adev->grbm_idx_mutex);
2022
2023 udelay(50);
2024}
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2040{
2041 adev->gfx.scratch.num_reg = 8;
2042 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2043 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
2044}
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2058{
2059 struct amdgpu_device *adev = ring->adev;
2060 uint32_t scratch;
2061 uint32_t tmp = 0;
2062 unsigned i;
2063 int r;
2064
2065 r = amdgpu_gfx_scratch_get(adev, &scratch);
2066 if (r)
2067 return r;
2068
2069 WREG32(scratch, 0xCAFEDEAD);
2070 r = amdgpu_ring_alloc(ring, 3);
2071 if (r)
2072 goto error_free_scratch;
2073
2074 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2075 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2076 amdgpu_ring_write(ring, 0xDEADBEEF);
2077 amdgpu_ring_commit(ring);
2078
2079 for (i = 0; i < adev->usec_timeout; i++) {
2080 tmp = RREG32(scratch);
2081 if (tmp == 0xDEADBEEF)
2082 break;
2083 DRM_UDELAY(1);
2084 }
2085 if (i >= adev->usec_timeout)
2086 r = -ETIMEDOUT;
2087
2088error_free_scratch:
2089 amdgpu_gfx_scratch_free(adev, scratch);
2090 return r;
2091}
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2102{
2103 u32 ref_and_mask;
2104 int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2105
2106 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
2107 switch (ring->me) {
2108 case 1:
2109 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2110 break;
2111 case 2:
2112 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2113 break;
2114 default:
2115 return;
2116 }
2117 } else {
2118 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2119 }
2120
2121 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2122 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) |
2123 WAIT_REG_MEM_FUNCTION(3) |
2124 WAIT_REG_MEM_ENGINE(usepfp)));
2125 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2126 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2127 amdgpu_ring_write(ring, ref_and_mask);
2128 amdgpu_ring_write(ring, ref_and_mask);
2129 amdgpu_ring_write(ring, 0x20);
2130}
2131
2132static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2133{
2134 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2135 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
2136 EVENT_INDEX(4));
2137
2138 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
2139 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
2140 EVENT_INDEX(0));
2141}
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2153 u64 seq, unsigned flags)
2154{
2155 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2156 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2157
2158
2159
2160 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2161 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2162 EOP_TC_ACTION_EN |
2163 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2164 EVENT_INDEX(5)));
2165 amdgpu_ring_write(ring, addr & 0xfffffffc);
2166 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2167 DATA_SEL(1) | INT_SEL(0));
2168 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2169 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2170
2171
2172 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2173 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2174 EOP_TC_ACTION_EN |
2175 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2176 EVENT_INDEX(5)));
2177 amdgpu_ring_write(ring, addr & 0xfffffffc);
2178 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2179 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2180 amdgpu_ring_write(ring, lower_32_bits(seq));
2181 amdgpu_ring_write(ring, upper_32_bits(seq));
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2194 u64 addr, u64 seq,
2195 unsigned flags)
2196{
2197 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2198 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2199
2200
2201 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2202 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2203 EOP_TC_ACTION_EN |
2204 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2205 EVENT_INDEX(5)));
2206 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2207 amdgpu_ring_write(ring, addr & 0xfffffffc);
2208 amdgpu_ring_write(ring, upper_32_bits(addr));
2209 amdgpu_ring_write(ring, lower_32_bits(seq));
2210 amdgpu_ring_write(ring, upper_32_bits(seq));
2211}
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2229 struct amdgpu_job *job,
2230 struct amdgpu_ib *ib,
2231 bool ctx_switch)
2232{
2233 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2234 u32 header, control = 0;
2235
2236
2237 if (ctx_switch) {
2238 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2239 amdgpu_ring_write(ring, 0);
2240 }
2241
2242 if (ib->flags & AMDGPU_IB_FLAG_CE)
2243 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2244 else
2245 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2246
2247 control |= ib->length_dw | (vmid << 24);
2248
2249 amdgpu_ring_write(ring, header);
2250 amdgpu_ring_write(ring,
2251#ifdef __BIG_ENDIAN
2252 (2 << 0) |
2253#endif
2254 (ib->gpu_addr & 0xFFFFFFFC));
2255 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2256 amdgpu_ring_write(ring, control);
2257}
2258
2259static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2260 struct amdgpu_job *job,
2261 struct amdgpu_ib *ib,
2262 bool ctx_switch)
2263{
2264 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2265 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2266
2267 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2268 amdgpu_ring_write(ring,
2269#ifdef __BIG_ENDIAN
2270 (2 << 0) |
2271#endif
2272 (ib->gpu_addr & 0xFFFFFFFC));
2273 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2274 amdgpu_ring_write(ring, control);
2275}
2276
2277static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2278{
2279 uint32_t dw2 = 0;
2280
2281 dw2 |= 0x80000000;
2282 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2283 gfx_v7_0_ring_emit_vgt_flush(ring);
2284
2285 dw2 |= 0x8001;
2286
2287 dw2 |= 0x01000000;
2288
2289 dw2 |= 0x10002;
2290 }
2291
2292 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2293 amdgpu_ring_write(ring, dw2);
2294 amdgpu_ring_write(ring, 0);
2295}
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2307{
2308 struct amdgpu_device *adev = ring->adev;
2309 struct amdgpu_ib ib;
2310 struct dma_fence *f = NULL;
2311 uint32_t scratch;
2312 uint32_t tmp = 0;
2313 long r;
2314
2315 r = amdgpu_gfx_scratch_get(adev, &scratch);
2316 if (r)
2317 return r;
2318
2319 WREG32(scratch, 0xCAFEDEAD);
2320 memset(&ib, 0, sizeof(ib));
2321 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2322 if (r)
2323 goto err1;
2324
2325 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2326 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2327 ib.ptr[2] = 0xDEADBEEF;
2328 ib.length_dw = 3;
2329
2330 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2331 if (r)
2332 goto err2;
2333
2334 r = dma_fence_wait_timeout(f, false, timeout);
2335 if (r == 0) {
2336 r = -ETIMEDOUT;
2337 goto err2;
2338 } else if (r < 0) {
2339 goto err2;
2340 }
2341 tmp = RREG32(scratch);
2342 if (tmp == 0xDEADBEEF)
2343 r = 0;
2344 else
2345 r = -EINVAL;
2346
2347err2:
2348 amdgpu_ib_free(adev, &ib, NULL);
2349 dma_fence_put(f);
2350err1:
2351 amdgpu_gfx_scratch_free(adev, scratch);
2352 return r;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2387{
2388 int i;
2389
2390 if (enable) {
2391 WREG32(mmCP_ME_CNTL, 0);
2392 } else {
2393 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2394 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2395 adev->gfx.gfx_ring[i].sched.ready = false;
2396 }
2397 udelay(50);
2398}
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2409{
2410 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2411 const struct gfx_firmware_header_v1_0 *ce_hdr;
2412 const struct gfx_firmware_header_v1_0 *me_hdr;
2413 const __le32 *fw_data;
2414 unsigned i, fw_size;
2415
2416 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2417 return -EINVAL;
2418
2419 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2420 ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2421 me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2422
2423 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2424 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2425 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2426 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2427 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2428 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2429 adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2430 adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2431 adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2432
2433 gfx_v7_0_cp_gfx_enable(adev, false);
2434
2435
2436 fw_data = (const __le32 *)
2437 (adev->gfx.pfp_fw->data +
2438 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2439 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2440 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2441 for (i = 0; i < fw_size; i++)
2442 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2443 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2444
2445
2446 fw_data = (const __le32 *)
2447 (adev->gfx.ce_fw->data +
2448 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2449 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2450 WREG32(mmCP_CE_UCODE_ADDR, 0);
2451 for (i = 0; i < fw_size; i++)
2452 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2453 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2454
2455
2456 fw_data = (const __le32 *)
2457 (adev->gfx.me_fw->data +
2458 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2459 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2460 WREG32(mmCP_ME_RAM_WADDR, 0);
2461 for (i = 0; i < fw_size; i++)
2462 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2463 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2464
2465 return 0;
2466}
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2478{
2479 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2480 const struct cs_section_def *sect = NULL;
2481 const struct cs_extent_def *ext = NULL;
2482 int r, i;
2483
2484
2485 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2486 WREG32(mmCP_ENDIAN_SWAP, 0);
2487 WREG32(mmCP_DEVICE_ID, 1);
2488
2489 gfx_v7_0_cp_gfx_enable(adev, true);
2490
2491 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2492 if (r) {
2493 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2494 return r;
2495 }
2496
2497
2498 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2499 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2500 amdgpu_ring_write(ring, 0x8000);
2501 amdgpu_ring_write(ring, 0x8000);
2502
2503
2504 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2505 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2506
2507 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2508 amdgpu_ring_write(ring, 0x80000000);
2509 amdgpu_ring_write(ring, 0x80000000);
2510
2511 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2512 for (ext = sect->section; ext->extent != NULL; ++ext) {
2513 if (sect->id == SECT_CONTEXT) {
2514 amdgpu_ring_write(ring,
2515 PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2516 amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2517 for (i = 0; i < ext->reg_count; i++)
2518 amdgpu_ring_write(ring, ext->extent[i]);
2519 }
2520 }
2521 }
2522
2523 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2524 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2525 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
2526 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
2527
2528 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2529 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2530
2531 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2532 amdgpu_ring_write(ring, 0);
2533
2534 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2535 amdgpu_ring_write(ring, 0x00000316);
2536 amdgpu_ring_write(ring, 0x0000000e);
2537 amdgpu_ring_write(ring, 0x00000010);
2538
2539 amdgpu_ring_commit(ring);
2540
2541 return 0;
2542}
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2554{
2555 struct amdgpu_ring *ring;
2556 u32 tmp;
2557 u32 rb_bufsz;
2558 u64 rb_addr, rptr_addr;
2559 int r;
2560
2561 WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2562 if (adev->asic_type != CHIP_HAWAII)
2563 WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2564
2565
2566 WREG32(mmCP_RB_WPTR_DELAY, 0);
2567
2568
2569 WREG32(mmCP_RB_VMID, 0);
2570
2571 WREG32(mmSCRATCH_ADDR, 0);
2572
2573
2574
2575 ring = &adev->gfx.gfx_ring[0];
2576 rb_bufsz = order_base_2(ring->ring_size / 8);
2577 tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2578#ifdef __BIG_ENDIAN
2579 tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2580#endif
2581 WREG32(mmCP_RB0_CNTL, tmp);
2582
2583
2584 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2585 ring->wptr = 0;
2586 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2587
2588
2589 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2590 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2591 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2592
2593
2594 WREG32(mmSCRATCH_UMSK, 0);
2595
2596 mdelay(1);
2597 WREG32(mmCP_RB0_CNTL, tmp);
2598
2599 rb_addr = ring->gpu_addr >> 8;
2600 WREG32(mmCP_RB0_BASE, rb_addr);
2601 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2602
2603
2604 gfx_v7_0_cp_gfx_start(adev);
2605 r = amdgpu_ring_test_helper(ring);
2606 if (r)
2607 return r;
2608
2609 return 0;
2610}
2611
2612static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
2613{
2614 return ring->adev->wb.wb[ring->rptr_offs];
2615}
2616
2617static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2618{
2619 struct amdgpu_device *adev = ring->adev;
2620
2621 return RREG32(mmCP_RB0_WPTR);
2622}
2623
2624static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
2625{
2626 struct amdgpu_device *adev = ring->adev;
2627
2628 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2629 (void)RREG32(mmCP_RB0_WPTR);
2630}
2631
2632static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
2633{
2634
2635 return ring->adev->wb.wb[ring->wptr_offs];
2636}
2637
2638static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
2639{
2640 struct amdgpu_device *adev = ring->adev;
2641
2642
2643 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
2644 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2656{
2657 int i;
2658
2659 if (enable) {
2660 WREG32(mmCP_MEC_CNTL, 0);
2661 } else {
2662 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2663 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2664 adev->gfx.compute_ring[i].sched.ready = false;
2665 }
2666 udelay(50);
2667}
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2678{
2679 const struct gfx_firmware_header_v1_0 *mec_hdr;
2680 const __le32 *fw_data;
2681 unsigned i, fw_size;
2682
2683 if (!adev->gfx.mec_fw)
2684 return -EINVAL;
2685
2686 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2687 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2688 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2689 adev->gfx.mec_feature_version = le32_to_cpu(
2690 mec_hdr->ucode_feature_version);
2691
2692 gfx_v7_0_cp_compute_enable(adev, false);
2693
2694
2695 fw_data = (const __le32 *)
2696 (adev->gfx.mec_fw->data +
2697 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2698 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2699 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2700 for (i = 0; i < fw_size; i++)
2701 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
2702 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2703
2704 if (adev->asic_type == CHIP_KAVERI) {
2705 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2706
2707 if (!adev->gfx.mec2_fw)
2708 return -EINVAL;
2709
2710 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2711 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2712 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2713 adev->gfx.mec2_feature_version = le32_to_cpu(
2714 mec2_hdr->ucode_feature_version);
2715
2716
2717 fw_data = (const __le32 *)
2718 (adev->gfx.mec2_fw->data +
2719 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2720 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2721 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2722 for (i = 0; i < fw_size; i++)
2723 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
2724 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2725 }
2726
2727 return 0;
2728}
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
2739{
2740 int i;
2741
2742 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2743 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2744
2745 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
2746 }
2747}
2748
2749static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2750{
2751 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
2752}
2753
2754static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2755{
2756 int r;
2757 u32 *hpd;
2758 size_t mec_hpd_size;
2759
2760 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2761
2762
2763 amdgpu_gfx_compute_queue_acquire(adev);
2764
2765
2766 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2767 * GFX7_MEC_HPD_SIZE * 2;
2768
2769 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2770 AMDGPU_GEM_DOMAIN_VRAM,
2771 &adev->gfx.mec.hpd_eop_obj,
2772 &adev->gfx.mec.hpd_eop_gpu_addr,
2773 (void **)&hpd);
2774 if (r) {
2775 dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
2776 gfx_v7_0_mec_fini(adev);
2777 return r;
2778 }
2779
2780
2781 memset(hpd, 0, mec_hpd_size);
2782
2783 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2784 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2785
2786 return 0;
2787}
2788
2789struct hqd_registers
2790{
2791 u32 cp_mqd_base_addr;
2792 u32 cp_mqd_base_addr_hi;
2793 u32 cp_hqd_active;
2794 u32 cp_hqd_vmid;
2795 u32 cp_hqd_persistent_state;
2796 u32 cp_hqd_pipe_priority;
2797 u32 cp_hqd_queue_priority;
2798 u32 cp_hqd_quantum;
2799 u32 cp_hqd_pq_base;
2800 u32 cp_hqd_pq_base_hi;
2801 u32 cp_hqd_pq_rptr;
2802 u32 cp_hqd_pq_rptr_report_addr;
2803 u32 cp_hqd_pq_rptr_report_addr_hi;
2804 u32 cp_hqd_pq_wptr_poll_addr;
2805 u32 cp_hqd_pq_wptr_poll_addr_hi;
2806 u32 cp_hqd_pq_doorbell_control;
2807 u32 cp_hqd_pq_wptr;
2808 u32 cp_hqd_pq_control;
2809 u32 cp_hqd_ib_base_addr;
2810 u32 cp_hqd_ib_base_addr_hi;
2811 u32 cp_hqd_ib_rptr;
2812 u32 cp_hqd_ib_control;
2813 u32 cp_hqd_iq_timer;
2814 u32 cp_hqd_iq_rptr;
2815 u32 cp_hqd_dequeue_request;
2816 u32 cp_hqd_dma_offload;
2817 u32 cp_hqd_sema_cmd;
2818 u32 cp_hqd_msg_type;
2819 u32 cp_hqd_atomic0_preop_lo;
2820 u32 cp_hqd_atomic0_preop_hi;
2821 u32 cp_hqd_atomic1_preop_lo;
2822 u32 cp_hqd_atomic1_preop_hi;
2823 u32 cp_hqd_hq_scheduler0;
2824 u32 cp_hqd_hq_scheduler1;
2825 u32 cp_mqd_control;
2826};
2827
2828static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2829 int mec, int pipe)
2830{
2831 u64 eop_gpu_addr;
2832 u32 tmp;
2833 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2834 * GFX7_MEC_HPD_SIZE * 2;
2835
2836 mutex_lock(&adev->srbm_mutex);
2837 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2838
2839 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2840
2841
2842 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2843 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2844
2845
2846 WREG32(mmCP_HPD_EOP_VMID, 0);
2847
2848
2849 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2850 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2851 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2852 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2853
2854 cik_srbm_select(adev, 0, 0, 0, 0);
2855 mutex_unlock(&adev->srbm_mutex);
2856}
2857
2858static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
2859{
2860 int i;
2861
2862
2863 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2864 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2865 for (i = 0; i < adev->usec_timeout; i++) {
2866 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2867 break;
2868 udelay(1);
2869 }
2870
2871 if (i == adev->usec_timeout)
2872 return -ETIMEDOUT;
2873
2874 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
2875 WREG32(mmCP_HQD_PQ_RPTR, 0);
2876 WREG32(mmCP_HQD_PQ_WPTR, 0);
2877 }
2878
2879 return 0;
2880}
2881
2882static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
2883 struct cik_mqd *mqd,
2884 uint64_t mqd_gpu_addr,
2885 struct amdgpu_ring *ring)
2886{
2887 u64 hqd_gpu_addr;
2888 u64 wb_gpu_addr;
2889
2890
2891 memset(mqd, 0, sizeof(struct cik_mqd));
2892
2893 mqd->header = 0xC0310800;
2894 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2895 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2896 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2897 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2898
2899
2900 mqd->cp_hqd_pq_doorbell_control =
2901 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2902 if (ring->use_doorbell)
2903 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2904 else
2905 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2906
2907
2908 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2909 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2910
2911
2912 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
2913 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
2914
2915
2916 hqd_gpu_addr = ring->gpu_addr >> 8;
2917 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2918 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2919
2920
2921 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
2922 mqd->cp_hqd_pq_control &=
2923 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
2924 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
2925
2926 mqd->cp_hqd_pq_control |=
2927 order_base_2(ring->ring_size / 8);
2928 mqd->cp_hqd_pq_control |=
2929 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
2930#ifdef __BIG_ENDIAN
2931 mqd->cp_hqd_pq_control |=
2932 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
2933#endif
2934 mqd->cp_hqd_pq_control &=
2935 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
2936 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
2937 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
2938 mqd->cp_hqd_pq_control |=
2939 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
2940 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK;
2941
2942
2943 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2944 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2945 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2946
2947
2948 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2949 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2950 mqd->cp_hqd_pq_rptr_report_addr_hi =
2951 upper_32_bits(wb_gpu_addr) & 0xffff;
2952
2953
2954 if (ring->use_doorbell) {
2955 mqd->cp_hqd_pq_doorbell_control =
2956 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2957 mqd->cp_hqd_pq_doorbell_control &=
2958 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
2959 mqd->cp_hqd_pq_doorbell_control |=
2960 (ring->doorbell_index <<
2961 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
2962 mqd->cp_hqd_pq_doorbell_control |=
2963 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
2964 mqd->cp_hqd_pq_doorbell_control &=
2965 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
2966 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
2967
2968 } else {
2969 mqd->cp_hqd_pq_doorbell_control = 0;
2970 }
2971
2972
2973 ring->wptr = 0;
2974 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
2975 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
2976
2977
2978 mqd->cp_hqd_vmid = 0;
2979
2980
2981 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
2982 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
2983 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
2984 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
2985 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
2986 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
2987 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
2988 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
2989 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
2990 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
2991 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
2992 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
2993 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
2994 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
2995 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
2996 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
2997
2998
2999 mqd->cp_hqd_active = 1;
3000}
3001
3002int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3003{
3004 uint32_t tmp;
3005 uint32_t mqd_reg;
3006 uint32_t *mqd_data;
3007
3008
3009 mqd_data = &mqd->cp_mqd_base_addr_lo;
3010
3011
3012 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3013 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3014 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3015
3016
3017 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3018 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3019
3020
3021 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3022 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3023
3024 return 0;
3025}
3026
3027static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3028{
3029 int r;
3030 u64 mqd_gpu_addr;
3031 struct cik_mqd *mqd;
3032 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3033
3034 r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
3035 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3036 &mqd_gpu_addr, (void **)&mqd);
3037 if (r) {
3038 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3039 return r;
3040 }
3041
3042 mutex_lock(&adev->srbm_mutex);
3043 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3044
3045 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3046 gfx_v7_0_mqd_deactivate(adev);
3047 gfx_v7_0_mqd_commit(adev, mqd);
3048
3049 cik_srbm_select(adev, 0, 0, 0, 0);
3050 mutex_unlock(&adev->srbm_mutex);
3051
3052 amdgpu_bo_kunmap(ring->mqd_obj);
3053 amdgpu_bo_unreserve(ring->mqd_obj);
3054 return 0;
3055}
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3067{
3068 int r, i, j;
3069 u32 tmp;
3070 struct amdgpu_ring *ring;
3071
3072
3073 tmp = RREG32(mmCP_CPF_DEBUG);
3074 tmp |= (1 << 23);
3075 WREG32(mmCP_CPF_DEBUG, tmp);
3076
3077
3078 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3079 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3080 gfx_v7_0_compute_pipe_init(adev, i, j);
3081
3082
3083 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3084 r = gfx_v7_0_compute_queue_init(adev, i);
3085 if (r) {
3086 gfx_v7_0_cp_compute_fini(adev);
3087 return r;
3088 }
3089 }
3090
3091 gfx_v7_0_cp_compute_enable(adev, true);
3092
3093 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3094 ring = &adev->gfx.compute_ring[i];
3095 amdgpu_ring_test_helper(ring);
3096 }
3097
3098 return 0;
3099}
3100
3101static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3102{
3103 gfx_v7_0_cp_gfx_enable(adev, enable);
3104 gfx_v7_0_cp_compute_enable(adev, enable);
3105}
3106
3107static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3108{
3109 int r;
3110
3111 r = gfx_v7_0_cp_gfx_load_microcode(adev);
3112 if (r)
3113 return r;
3114 r = gfx_v7_0_cp_compute_load_microcode(adev);
3115 if (r)
3116 return r;
3117
3118 return 0;
3119}
3120
3121static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3122 bool enable)
3123{
3124 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3125
3126 if (enable)
3127 tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3128 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3129 else
3130 tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3131 CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3132 WREG32(mmCP_INT_CNTL_RING0, tmp);
3133}
3134
3135static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3136{
3137 int r;
3138
3139 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3140
3141 r = gfx_v7_0_cp_load_microcode(adev);
3142 if (r)
3143 return r;
3144
3145 r = gfx_v7_0_cp_gfx_resume(adev);
3146 if (r)
3147 return r;
3148 r = gfx_v7_0_cp_compute_resume(adev);
3149 if (r)
3150 return r;
3151
3152 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3153
3154 return 0;
3155}
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3166{
3167 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3168 uint32_t seq = ring->fence_drv.sync_seq;
3169 uint64_t addr = ring->fence_drv.gpu_addr;
3170
3171 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3172 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) |
3173 WAIT_REG_MEM_FUNCTION(3) |
3174 WAIT_REG_MEM_ENGINE(usepfp)));
3175 amdgpu_ring_write(ring, addr & 0xfffffffc);
3176 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3177 amdgpu_ring_write(ring, seq);
3178 amdgpu_ring_write(ring, 0xffffffff);
3179 amdgpu_ring_write(ring, 4);
3180
3181 if (usepfp) {
3182
3183 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3184 amdgpu_ring_write(ring, 0);
3185 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3186 amdgpu_ring_write(ring, 0);
3187 }
3188}
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3205 unsigned vmid, uint64_t pd_addr)
3206{
3207 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3208
3209 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3210
3211
3212 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3213 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) |
3214 WAIT_REG_MEM_FUNCTION(0) |
3215 WAIT_REG_MEM_ENGINE(0)));
3216 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3217 amdgpu_ring_write(ring, 0);
3218 amdgpu_ring_write(ring, 0);
3219 amdgpu_ring_write(ring, 0);
3220 amdgpu_ring_write(ring, 0x20);
3221
3222
3223 if (usepfp) {
3224
3225 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3226 amdgpu_ring_write(ring, 0x0);
3227
3228
3229 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3230 amdgpu_ring_write(ring, 0);
3231 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3232 amdgpu_ring_write(ring, 0);
3233 }
3234}
3235
3236static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3237 uint32_t reg, uint32_t val)
3238{
3239 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3240
3241 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3242 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3243 WRITE_DATA_DST_SEL(0)));
3244 amdgpu_ring_write(ring, reg);
3245 amdgpu_ring_write(ring, 0);
3246 amdgpu_ring_write(ring, val);
3247}
3248
3249
3250
3251
3252
3253
3254static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3255{
3256 const u32 *src_ptr;
3257 u32 dws;
3258 const struct cs_section_def *cs_data;
3259 int r;
3260
3261
3262 if (adev->flags & AMD_IS_APU) {
3263 if (adev->asic_type == CHIP_KAVERI) {
3264 adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3265 adev->gfx.rlc.reg_list_size =
3266 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3267 } else {
3268 adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3269 adev->gfx.rlc.reg_list_size =
3270 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3271 }
3272 }
3273 adev->gfx.rlc.cs_data = ci_cs_data;
3274 adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048);
3275 adev->gfx.rlc.cp_table_size += 64 * 1024;
3276
3277 src_ptr = adev->gfx.rlc.reg_list;
3278 dws = adev->gfx.rlc.reg_list_size;
3279 dws += (5 * 16) + 48 + 48 + 64;
3280
3281 cs_data = adev->gfx.rlc.cs_data;
3282
3283 if (src_ptr) {
3284
3285 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3286 if (r)
3287 return r;
3288 }
3289
3290 if (cs_data) {
3291
3292 r = amdgpu_gfx_rlc_init_csb(adev);
3293 if (r)
3294 return r;
3295 }
3296
3297 if (adev->gfx.rlc.cp_table_size) {
3298 r = amdgpu_gfx_rlc_init_cpt(adev);
3299 if (r)
3300 return r;
3301 }
3302
3303 return 0;
3304}
3305
3306static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3307{
3308 u32 tmp;
3309
3310 tmp = RREG32(mmRLC_LB_CNTL);
3311 if (enable)
3312 tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3313 else
3314 tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3315 WREG32(mmRLC_LB_CNTL, tmp);
3316}
3317
3318static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3319{
3320 u32 i, j, k;
3321 u32 mask;
3322
3323 mutex_lock(&adev->grbm_idx_mutex);
3324 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3325 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3326 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
3327 for (k = 0; k < adev->usec_timeout; k++) {
3328 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3329 break;
3330 udelay(1);
3331 }
3332 }
3333 }
3334 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3335 mutex_unlock(&adev->grbm_idx_mutex);
3336
3337 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3338 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3339 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3340 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3341 for (k = 0; k < adev->usec_timeout; k++) {
3342 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3343 break;
3344 udelay(1);
3345 }
3346}
3347
3348static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3349{
3350 u32 tmp;
3351
3352 tmp = RREG32(mmRLC_CNTL);
3353 if (tmp != rlc)
3354 WREG32(mmRLC_CNTL, rlc);
3355}
3356
3357static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3358{
3359 u32 data, orig;
3360
3361 orig = data = RREG32(mmRLC_CNTL);
3362
3363 if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3364 u32 i;
3365
3366 data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3367 WREG32(mmRLC_CNTL, data);
3368
3369 for (i = 0; i < adev->usec_timeout; i++) {
3370 if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3371 break;
3372 udelay(1);
3373 }
3374
3375 gfx_v7_0_wait_for_rlc_serdes(adev);
3376 }
3377
3378 return orig;
3379}
3380
3381static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3382{
3383 return true;
3384}
3385
3386static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3387{
3388 u32 tmp, i, mask;
3389
3390 tmp = 0x1 | (1 << 1);
3391 WREG32(mmRLC_GPR_REG2, tmp);
3392
3393 mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3394 RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3395 for (i = 0; i < adev->usec_timeout; i++) {
3396 if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3397 break;
3398 udelay(1);
3399 }
3400
3401 for (i = 0; i < adev->usec_timeout; i++) {
3402 if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3403 break;
3404 udelay(1);
3405 }
3406}
3407
3408static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3409{
3410 u32 tmp;
3411
3412 tmp = 0x1 | (0 << 1);
3413 WREG32(mmRLC_GPR_REG2, tmp);
3414}
3415
3416
3417
3418
3419
3420
3421
3422
3423static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
3424{
3425 WREG32(mmRLC_CNTL, 0);
3426
3427 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3428
3429 gfx_v7_0_wait_for_rlc_serdes(adev);
3430}
3431
3432
3433
3434
3435
3436
3437
3438
3439static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
3440{
3441 WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
3442
3443 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3444
3445 udelay(50);
3446}
3447
3448static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
3449{
3450 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3451
3452 tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3453 WREG32(mmGRBM_SOFT_RESET, tmp);
3454 udelay(50);
3455 tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
3456 WREG32(mmGRBM_SOFT_RESET, tmp);
3457 udelay(50);
3458}
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3470{
3471 const struct rlc_firmware_header_v1_0 *hdr;
3472 const __le32 *fw_data;
3473 unsigned i, fw_size;
3474 u32 tmp;
3475
3476 if (!adev->gfx.rlc_fw)
3477 return -EINVAL;
3478
3479 hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
3480 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3481 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
3482 adev->gfx.rlc_feature_version = le32_to_cpu(
3483 hdr->ucode_feature_version);
3484
3485 adev->gfx.rlc.funcs->stop(adev);
3486
3487
3488 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3489 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3490
3491 adev->gfx.rlc.funcs->reset(adev);
3492
3493 gfx_v7_0_init_pg(adev);
3494
3495 WREG32(mmRLC_LB_CNTR_INIT, 0);
3496 WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
3497
3498 mutex_lock(&adev->grbm_idx_mutex);
3499 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3500 WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
3501 WREG32(mmRLC_LB_PARAMS, 0x00600408);
3502 WREG32(mmRLC_LB_CNTL, 0x80000004);
3503 mutex_unlock(&adev->grbm_idx_mutex);
3504
3505 WREG32(mmRLC_MC_CNTL, 0);
3506 WREG32(mmRLC_UCODE_CNTL, 0);
3507
3508 fw_data = (const __le32 *)
3509 (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3510 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3511 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3512 for (i = 0; i < fw_size; i++)
3513 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3514 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3515
3516
3517 gfx_v7_0_enable_lbpw(adev, false);
3518
3519 if (adev->asic_type == CHIP_BONAIRE)
3520 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3521
3522 adev->gfx.rlc.funcs->start(adev);
3523
3524 return 0;
3525}
3526
3527static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3528{
3529 u32 data, orig, tmp, tmp2;
3530
3531 orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
3532
3533 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3534 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3535
3536 tmp = gfx_v7_0_halt_rlc(adev);
3537
3538 mutex_lock(&adev->grbm_idx_mutex);
3539 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3540 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3541 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3542 tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3543 RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
3544 RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
3545 WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
3546 mutex_unlock(&adev->grbm_idx_mutex);
3547
3548 gfx_v7_0_update_rlc(adev, tmp);
3549
3550 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3551 if (orig != data)
3552 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3553
3554 } else {
3555 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3556
3557 RREG32(mmCB_CGTT_SCLK_CTRL);
3558 RREG32(mmCB_CGTT_SCLK_CTRL);
3559 RREG32(mmCB_CGTT_SCLK_CTRL);
3560 RREG32(mmCB_CGTT_SCLK_CTRL);
3561
3562 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3563 if (orig != data)
3564 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3565
3566 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3567 }
3568}
3569
3570static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
3571{
3572 u32 data, orig, tmp = 0;
3573
3574 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3575 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3576 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3577 orig = data = RREG32(mmCP_MEM_SLP_CNTL);
3578 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3579 if (orig != data)
3580 WREG32(mmCP_MEM_SLP_CNTL, data);
3581 }
3582 }
3583
3584 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3585 data |= 0x00000001;
3586 data &= 0xfffffffd;
3587 if (orig != data)
3588 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3589
3590 tmp = gfx_v7_0_halt_rlc(adev);
3591
3592 mutex_lock(&adev->grbm_idx_mutex);
3593 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3594 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3595 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3596 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
3597 RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
3598 WREG32(mmRLC_SERDES_WR_CTRL, data);
3599 mutex_unlock(&adev->grbm_idx_mutex);
3600
3601 gfx_v7_0_update_rlc(adev, tmp);
3602
3603 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
3604 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3605 data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
3606 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
3607 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
3608 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
3609 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
3610 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
3611 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3612 data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
3613 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
3614 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
3615 if (orig != data)
3616 WREG32(mmCGTS_SM_CTRL_REG, data);
3617 }
3618 } else {
3619 orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
3620 data |= 0x00000003;
3621 if (orig != data)
3622 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
3623
3624 data = RREG32(mmRLC_MEM_SLP_CNTL);
3625 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3626 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3627 WREG32(mmRLC_MEM_SLP_CNTL, data);
3628 }
3629
3630 data = RREG32(mmCP_MEM_SLP_CNTL);
3631 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3632 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3633 WREG32(mmCP_MEM_SLP_CNTL, data);
3634 }
3635
3636 orig = data = RREG32(mmCGTS_SM_CTRL_REG);
3637 data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
3638 if (orig != data)
3639 WREG32(mmCGTS_SM_CTRL_REG, data);
3640
3641 tmp = gfx_v7_0_halt_rlc(adev);
3642
3643 mutex_lock(&adev->grbm_idx_mutex);
3644 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3645 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
3646 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
3647 data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
3648 WREG32(mmRLC_SERDES_WR_CTRL, data);
3649 mutex_unlock(&adev->grbm_idx_mutex);
3650
3651 gfx_v7_0_update_rlc(adev, tmp);
3652 }
3653}
3654
3655static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
3656 bool enable)
3657{
3658 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3659
3660 if (enable) {
3661 gfx_v7_0_enable_mgcg(adev, true);
3662 gfx_v7_0_enable_cgcg(adev, true);
3663 } else {
3664 gfx_v7_0_enable_cgcg(adev, false);
3665 gfx_v7_0_enable_mgcg(adev, false);
3666 }
3667 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3668}
3669
3670static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
3671 bool enable)
3672{
3673 u32 data, orig;
3674
3675 orig = data = RREG32(mmRLC_PG_CNTL);
3676 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3677 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3678 else
3679 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3680 if (orig != data)
3681 WREG32(mmRLC_PG_CNTL, data);
3682}
3683
3684static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
3685 bool enable)
3686{
3687 u32 data, orig;
3688
3689 orig = data = RREG32(mmRLC_PG_CNTL);
3690 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
3691 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3692 else
3693 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3694 if (orig != data)
3695 WREG32(mmRLC_PG_CNTL, data);
3696}
3697
3698static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
3699{
3700 u32 data, orig;
3701
3702 orig = data = RREG32(mmRLC_PG_CNTL);
3703 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
3704 data &= ~0x8000;
3705 else
3706 data |= 0x8000;
3707 if (orig != data)
3708 WREG32(mmRLC_PG_CNTL, data);
3709}
3710
3711static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3712{
3713 u32 data, orig;
3714
3715 orig = data = RREG32(mmRLC_PG_CNTL);
3716 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
3717 data &= ~0x2000;
3718 else
3719 data |= 0x2000;
3720 if (orig != data)
3721 WREG32(mmRLC_PG_CNTL, data);
3722}
3723
3724static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3725{
3726 if (adev->asic_type == CHIP_KAVERI)
3727 return 5;
3728 else
3729 return 4;
3730}
3731
3732static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
3733 bool enable)
3734{
3735 u32 data, orig;
3736
3737 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
3738 orig = data = RREG32(mmRLC_PG_CNTL);
3739 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3740 if (orig != data)
3741 WREG32(mmRLC_PG_CNTL, data);
3742
3743 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3744 data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3745 if (orig != data)
3746 WREG32(mmRLC_AUTO_PG_CTRL, data);
3747 } else {
3748 orig = data = RREG32(mmRLC_PG_CNTL);
3749 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
3750 if (orig != data)
3751 WREG32(mmRLC_PG_CNTL, data);
3752
3753 orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
3754 data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
3755 if (orig != data)
3756 WREG32(mmRLC_AUTO_PG_CTRL, data);
3757
3758 data = RREG32(mmDB_RENDER_CONTROL);
3759 }
3760}
3761
3762static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
3763 u32 bitmap)
3764{
3765 u32 data;
3766
3767 if (!bitmap)
3768 return;
3769
3770 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3771 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3772
3773 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
3774}
3775
3776static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3777{
3778 u32 data, mask;
3779
3780 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3781 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
3782
3783 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3784 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3785
3786 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3787
3788 return (~data) & mask;
3789}
3790
3791static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
3792{
3793 u32 tmp;
3794
3795 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3796
3797 tmp = RREG32(mmRLC_MAX_PG_CU);
3798 tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
3799 tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
3800 WREG32(mmRLC_MAX_PG_CU, tmp);
3801}
3802
3803static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
3804 bool enable)
3805{
3806 u32 data, orig;
3807
3808 orig = data = RREG32(mmRLC_PG_CNTL);
3809 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
3810 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3811 else
3812 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
3813 if (orig != data)
3814 WREG32(mmRLC_PG_CNTL, data);
3815}
3816
3817static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
3818 bool enable)
3819{
3820 u32 data, orig;
3821
3822 orig = data = RREG32(mmRLC_PG_CNTL);
3823 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
3824 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3825 else
3826 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
3827 if (orig != data)
3828 WREG32(mmRLC_PG_CNTL, data);
3829}
3830
3831#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
3832#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
3833
3834static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
3835{
3836 u32 data, orig;
3837 u32 i;
3838
3839 if (adev->gfx.rlc.cs_data) {
3840 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3841 WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3842 WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
3843 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
3844 } else {
3845 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
3846 for (i = 0; i < 3; i++)
3847 WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
3848 }
3849 if (adev->gfx.rlc.reg_list) {
3850 WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
3851 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3852 WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
3853 }
3854
3855 orig = data = RREG32(mmRLC_PG_CNTL);
3856 data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
3857 if (orig != data)
3858 WREG32(mmRLC_PG_CNTL, data);
3859
3860 WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
3861 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3862
3863 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3864 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3865 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3866 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3867
3868 data = 0x10101010;
3869 WREG32(mmRLC_PG_DELAY, data);
3870
3871 data = RREG32(mmRLC_PG_DELAY_2);
3872 data &= ~0xff;
3873 data |= 0x3;
3874 WREG32(mmRLC_PG_DELAY_2, data);
3875
3876 data = RREG32(mmRLC_AUTO_PG_CTRL);
3877 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3878 data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3879 WREG32(mmRLC_AUTO_PG_CTRL, data);
3880
3881}
3882
3883static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
3884{
3885 gfx_v7_0_enable_gfx_cgpg(adev, enable);
3886 gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
3887 gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
3888}
3889
3890static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
3891{
3892 u32 count = 0;
3893 const struct cs_section_def *sect = NULL;
3894 const struct cs_extent_def *ext = NULL;
3895
3896 if (adev->gfx.rlc.cs_data == NULL)
3897 return 0;
3898
3899
3900 count += 2;
3901
3902 count += 3;
3903
3904 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3905 for (ext = sect->section; ext->extent != NULL; ++ext) {
3906 if (sect->id == SECT_CONTEXT)
3907 count += 2 + ext->reg_count;
3908 else
3909 return 0;
3910 }
3911 }
3912
3913 count += 4;
3914
3915 count += 2;
3916
3917 count += 2;
3918
3919 return count;
3920}
3921
3922static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
3923 volatile u32 *buffer)
3924{
3925 u32 count = 0, i;
3926 const struct cs_section_def *sect = NULL;
3927 const struct cs_extent_def *ext = NULL;
3928
3929 if (adev->gfx.rlc.cs_data == NULL)
3930 return;
3931 if (buffer == NULL)
3932 return;
3933
3934 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3935 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3936
3937 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3938 buffer[count++] = cpu_to_le32(0x80000000);
3939 buffer[count++] = cpu_to_le32(0x80000000);
3940
3941 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
3942 for (ext = sect->section; ext->extent != NULL; ++ext) {
3943 if (sect->id == SECT_CONTEXT) {
3944 buffer[count++] =
3945 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
3946 buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3947 for (i = 0; i < ext->reg_count; i++)
3948 buffer[count++] = cpu_to_le32(ext->extent[i]);
3949 } else {
3950 return;
3951 }
3952 }
3953 }
3954
3955 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3956 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3957 switch (adev->asic_type) {
3958 case CHIP_BONAIRE:
3959 buffer[count++] = cpu_to_le32(0x16000012);
3960 buffer[count++] = cpu_to_le32(0x00000000);
3961 break;
3962 case CHIP_KAVERI:
3963 buffer[count++] = cpu_to_le32(0x00000000);
3964 buffer[count++] = cpu_to_le32(0x00000000);
3965 break;
3966 case CHIP_KABINI:
3967 case CHIP_MULLINS:
3968 buffer[count++] = cpu_to_le32(0x00000000);
3969 buffer[count++] = cpu_to_le32(0x00000000);
3970 break;
3971 case CHIP_HAWAII:
3972 buffer[count++] = cpu_to_le32(0x3a00161a);
3973 buffer[count++] = cpu_to_le32(0x0000002e);
3974 break;
3975 default:
3976 buffer[count++] = cpu_to_le32(0x00000000);
3977 buffer[count++] = cpu_to_le32(0x00000000);
3978 break;
3979 }
3980
3981 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3982 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
3983
3984 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
3985 buffer[count++] = cpu_to_le32(0);
3986}
3987
3988static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
3989{
3990 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3991 AMD_PG_SUPPORT_GFX_SMG |
3992 AMD_PG_SUPPORT_GFX_DMG |
3993 AMD_PG_SUPPORT_CP |
3994 AMD_PG_SUPPORT_GDS |
3995 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3996 gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
3997 gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
3998 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
3999 gfx_v7_0_init_gfx_cgpg(adev);
4000 gfx_v7_0_enable_cp_pg(adev, true);
4001 gfx_v7_0_enable_gds_pg(adev, true);
4002 }
4003 gfx_v7_0_init_ao_cu_mask(adev);
4004 gfx_v7_0_update_gfx_pg(adev, true);
4005 }
4006}
4007
4008static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4009{
4010 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4011 AMD_PG_SUPPORT_GFX_SMG |
4012 AMD_PG_SUPPORT_GFX_DMG |
4013 AMD_PG_SUPPORT_CP |
4014 AMD_PG_SUPPORT_GDS |
4015 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4016 gfx_v7_0_update_gfx_pg(adev, false);
4017 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4018 gfx_v7_0_enable_cp_pg(adev, false);
4019 gfx_v7_0_enable_gds_pg(adev, false);
4020 }
4021 }
4022}
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4033{
4034 uint64_t clock;
4035
4036 mutex_lock(&adev->gfx.gpu_clock_mutex);
4037 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4038 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4039 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4040 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4041 return clock;
4042}
4043
4044static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4045 uint32_t vmid,
4046 uint32_t gds_base, uint32_t gds_size,
4047 uint32_t gws_base, uint32_t gws_size,
4048 uint32_t oa_base, uint32_t oa_size)
4049{
4050
4051 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4052 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4053 WRITE_DATA_DST_SEL(0)));
4054 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4055 amdgpu_ring_write(ring, 0);
4056 amdgpu_ring_write(ring, gds_base);
4057
4058
4059 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4060 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4061 WRITE_DATA_DST_SEL(0)));
4062 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4063 amdgpu_ring_write(ring, 0);
4064 amdgpu_ring_write(ring, gds_size);
4065
4066
4067 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4068 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4069 WRITE_DATA_DST_SEL(0)));
4070 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4071 amdgpu_ring_write(ring, 0);
4072 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4073
4074
4075 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4076 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4077 WRITE_DATA_DST_SEL(0)));
4078 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4079 amdgpu_ring_write(ring, 0);
4080 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4081}
4082
4083static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4084{
4085 struct amdgpu_device *adev = ring->adev;
4086 uint32_t value = 0;
4087
4088 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4089 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4090 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4091 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4092 WREG32(mmSQ_CMD, value);
4093}
4094
4095static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
4096{
4097 WREG32(mmSQ_IND_INDEX,
4098 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4099 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4100 (address << SQ_IND_INDEX__INDEX__SHIFT) |
4101 (SQ_IND_INDEX__FORCE_READ_MASK));
4102 return RREG32(mmSQ_IND_DATA);
4103}
4104
4105static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
4106 uint32_t wave, uint32_t thread,
4107 uint32_t regno, uint32_t num, uint32_t *out)
4108{
4109 WREG32(mmSQ_IND_INDEX,
4110 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
4111 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
4112 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
4113 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
4114 (SQ_IND_INDEX__FORCE_READ_MASK) |
4115 (SQ_IND_INDEX__AUTO_INCR_MASK));
4116 while (num--)
4117 *(out++) = RREG32(mmSQ_IND_DATA);
4118}
4119
4120static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
4121{
4122
4123 dst[(*no_fields)++] = 0;
4124 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
4125 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
4126 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
4127 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
4128 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
4129 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
4130 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
4131 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
4132 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
4133 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
4134 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
4135 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
4136 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
4137 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
4138 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
4139 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
4140 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
4141 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
4142}
4143
4144static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4145 uint32_t wave, uint32_t start,
4146 uint32_t size, uint32_t *dst)
4147{
4148 wave_read_regs(
4149 adev, simd, wave, 0,
4150 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4151}
4152
4153static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4154 u32 me, u32 pipe, u32 q)
4155{
4156 cik_srbm_select(adev, me, pipe, q, 0);
4157}
4158
4159static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4160 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4161 .select_se_sh = &gfx_v7_0_select_se_sh,
4162 .read_wave_data = &gfx_v7_0_read_wave_data,
4163 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4164 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4165};
4166
4167static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4168 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4169 .set_safe_mode = gfx_v7_0_set_safe_mode,
4170 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4171 .init = gfx_v7_0_rlc_init,
4172 .get_csb_size = gfx_v7_0_get_csb_size,
4173 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4174 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4175 .resume = gfx_v7_0_rlc_resume,
4176 .stop = gfx_v7_0_rlc_stop,
4177 .reset = gfx_v7_0_rlc_reset,
4178 .start = gfx_v7_0_rlc_start
4179};
4180
4181static int gfx_v7_0_early_init(void *handle)
4182{
4183 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4184
4185 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4186 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4187 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4188 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4189 gfx_v7_0_set_ring_funcs(adev);
4190 gfx_v7_0_set_irq_funcs(adev);
4191 gfx_v7_0_set_gds_init(adev);
4192
4193 return 0;
4194}
4195
4196static int gfx_v7_0_late_init(void *handle)
4197{
4198 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4199 int r;
4200
4201 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4202 if (r)
4203 return r;
4204
4205 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4206 if (r)
4207 return r;
4208
4209 return 0;
4210}
4211
4212static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4213{
4214 u32 gb_addr_config;
4215 u32 mc_shared_chmap, mc_arb_ramcfg;
4216 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4217 u32 tmp;
4218
4219 switch (adev->asic_type) {
4220 case CHIP_BONAIRE:
4221 adev->gfx.config.max_shader_engines = 2;
4222 adev->gfx.config.max_tile_pipes = 4;
4223 adev->gfx.config.max_cu_per_sh = 7;
4224 adev->gfx.config.max_sh_per_se = 1;
4225 adev->gfx.config.max_backends_per_se = 2;
4226 adev->gfx.config.max_texture_channel_caches = 4;
4227 adev->gfx.config.max_gprs = 256;
4228 adev->gfx.config.max_gs_threads = 32;
4229 adev->gfx.config.max_hw_contexts = 8;
4230
4231 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4232 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4233 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4234 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4235 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4236 break;
4237 case CHIP_HAWAII:
4238 adev->gfx.config.max_shader_engines = 4;
4239 adev->gfx.config.max_tile_pipes = 16;
4240 adev->gfx.config.max_cu_per_sh = 11;
4241 adev->gfx.config.max_sh_per_se = 1;
4242 adev->gfx.config.max_backends_per_se = 4;
4243 adev->gfx.config.max_texture_channel_caches = 16;
4244 adev->gfx.config.max_gprs = 256;
4245 adev->gfx.config.max_gs_threads = 32;
4246 adev->gfx.config.max_hw_contexts = 8;
4247
4248 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4249 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4250 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4251 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4252 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4253 break;
4254 case CHIP_KAVERI:
4255 adev->gfx.config.max_shader_engines = 1;
4256 adev->gfx.config.max_tile_pipes = 4;
4257 adev->gfx.config.max_cu_per_sh = 8;
4258 adev->gfx.config.max_backends_per_se = 2;
4259 adev->gfx.config.max_sh_per_se = 1;
4260 adev->gfx.config.max_texture_channel_caches = 4;
4261 adev->gfx.config.max_gprs = 256;
4262 adev->gfx.config.max_gs_threads = 16;
4263 adev->gfx.config.max_hw_contexts = 8;
4264
4265 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4266 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4267 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4268 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4269 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4270 break;
4271 case CHIP_KABINI:
4272 case CHIP_MULLINS:
4273 default:
4274 adev->gfx.config.max_shader_engines = 1;
4275 adev->gfx.config.max_tile_pipes = 2;
4276 adev->gfx.config.max_cu_per_sh = 2;
4277 adev->gfx.config.max_sh_per_se = 1;
4278 adev->gfx.config.max_backends_per_se = 1;
4279 adev->gfx.config.max_texture_channel_caches = 2;
4280 adev->gfx.config.max_gprs = 256;
4281 adev->gfx.config.max_gs_threads = 16;
4282 adev->gfx.config.max_hw_contexts = 8;
4283
4284 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4285 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4286 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4287 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4288 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4289 break;
4290 }
4291
4292 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
4293 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4294 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4295
4296 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4297 adev->gfx.config.mem_max_burst_length_bytes = 256;
4298 if (adev->flags & AMD_IS_APU) {
4299
4300 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4301 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4302 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4303
4304 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4305 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4306 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4307
4308
4309 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4310 dimm00_addr_map = 0;
4311 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4312 dimm01_addr_map = 0;
4313 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4314 dimm10_addr_map = 0;
4315 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4316 dimm11_addr_map = 0;
4317
4318
4319
4320 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4321 adev->gfx.config.mem_row_size_in_kb = 2;
4322 else
4323 adev->gfx.config.mem_row_size_in_kb = 1;
4324 } else {
4325 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4326 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4327 if (adev->gfx.config.mem_row_size_in_kb > 4)
4328 adev->gfx.config.mem_row_size_in_kb = 4;
4329 }
4330
4331 adev->gfx.config.shader_engine_tile_size = 32;
4332 adev->gfx.config.num_gpus = 1;
4333 adev->gfx.config.multi_gpu_tile_size = 64;
4334
4335
4336 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4337 switch (adev->gfx.config.mem_row_size_in_kb) {
4338 case 1:
4339 default:
4340 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4341 break;
4342 case 2:
4343 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4344 break;
4345 case 4:
4346 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4347 break;
4348 }
4349 adev->gfx.config.gb_addr_config = gb_addr_config;
4350}
4351
4352static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4353 int mec, int pipe, int queue)
4354{
4355 int r;
4356 unsigned irq_type;
4357 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4358
4359
4360 ring->me = mec + 1;
4361 ring->pipe = pipe;
4362 ring->queue = queue;
4363
4364 ring->ring_obj = NULL;
4365 ring->use_doorbell = true;
4366 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
4367 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4368
4369 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4370 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4371 + ring->pipe;
4372
4373
4374 r = amdgpu_ring_init(adev, ring, 1024,
4375 &adev->gfx.eop_irq, irq_type);
4376 if (r)
4377 return r;
4378
4379
4380 return 0;
4381}
4382
4383static int gfx_v7_0_sw_init(void *handle)
4384{
4385 struct amdgpu_ring *ring;
4386 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4387 int i, j, k, r, ring_id;
4388
4389 switch (adev->asic_type) {
4390 case CHIP_KAVERI:
4391 adev->gfx.mec.num_mec = 2;
4392 break;
4393 case CHIP_BONAIRE:
4394 case CHIP_HAWAII:
4395 case CHIP_KABINI:
4396 case CHIP_MULLINS:
4397 default:
4398 adev->gfx.mec.num_mec = 1;
4399 break;
4400 }
4401 adev->gfx.mec.num_pipe_per_mec = 4;
4402 adev->gfx.mec.num_queue_per_pipe = 8;
4403
4404
4405 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
4406 if (r)
4407 return r;
4408
4409
4410 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
4411 &adev->gfx.priv_reg_irq);
4412 if (r)
4413 return r;
4414
4415
4416 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
4417 &adev->gfx.priv_inst_irq);
4418 if (r)
4419 return r;
4420
4421 gfx_v7_0_scratch_init(adev);
4422
4423 r = gfx_v7_0_init_microcode(adev);
4424 if (r) {
4425 DRM_ERROR("Failed to load gfx firmware!\n");
4426 return r;
4427 }
4428
4429 r = adev->gfx.rlc.funcs->init(adev);
4430 if (r) {
4431 DRM_ERROR("Failed to init rlc BOs!\n");
4432 return r;
4433 }
4434
4435
4436 r = gfx_v7_0_mec_init(adev);
4437 if (r) {
4438 DRM_ERROR("Failed to init MEC BOs!\n");
4439 return r;
4440 }
4441
4442 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4443 ring = &adev->gfx.gfx_ring[i];
4444 ring->ring_obj = NULL;
4445 sprintf(ring->name, "gfx");
4446 r = amdgpu_ring_init(adev, ring, 1024,
4447 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
4448 if (r)
4449 return r;
4450 }
4451
4452
4453 ring_id = 0;
4454 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4455 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4456 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4457 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4458 continue;
4459
4460 r = gfx_v7_0_compute_ring_init(adev,
4461 ring_id,
4462 i, k, j);
4463 if (r)
4464 return r;
4465
4466 ring_id++;
4467 }
4468 }
4469 }
4470
4471 adev->gfx.ce_ram_size = 0x8000;
4472
4473 gfx_v7_0_gpu_early_init(adev);
4474
4475 return r;
4476}
4477
4478static int gfx_v7_0_sw_fini(void *handle)
4479{
4480 int i;
4481 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4482
4483 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
4484 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
4485 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
4486
4487 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4488 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4489 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4490 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4491
4492 gfx_v7_0_cp_compute_fini(adev);
4493 amdgpu_gfx_rlc_fini(adev);
4494 gfx_v7_0_mec_fini(adev);
4495 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4496 &adev->gfx.rlc.clear_state_gpu_addr,
4497 (void **)&adev->gfx.rlc.cs_ptr);
4498 if (adev->gfx.rlc.cp_table_size) {
4499 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
4500 &adev->gfx.rlc.cp_table_gpu_addr,
4501 (void **)&adev->gfx.rlc.cp_table_ptr);
4502 }
4503 gfx_v7_0_free_microcode(adev);
4504
4505 return 0;
4506}
4507
4508static int gfx_v7_0_hw_init(void *handle)
4509{
4510 int r;
4511 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4512
4513 gfx_v7_0_constants_init(adev);
4514
4515
4516 r = adev->gfx.rlc.funcs->resume(adev);
4517 if (r)
4518 return r;
4519
4520 r = gfx_v7_0_cp_resume(adev);
4521 if (r)
4522 return r;
4523
4524 return r;
4525}
4526
4527static int gfx_v7_0_hw_fini(void *handle)
4528{
4529 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4530
4531 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4532 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4533 gfx_v7_0_cp_enable(adev, false);
4534 adev->gfx.rlc.funcs->stop(adev);
4535 gfx_v7_0_fini_pg(adev);
4536
4537 return 0;
4538}
4539
4540static int gfx_v7_0_suspend(void *handle)
4541{
4542 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4543
4544 return gfx_v7_0_hw_fini(adev);
4545}
4546
4547static int gfx_v7_0_resume(void *handle)
4548{
4549 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4550
4551 return gfx_v7_0_hw_init(adev);
4552}
4553
4554static bool gfx_v7_0_is_idle(void *handle)
4555{
4556 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4557
4558 if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4559 return false;
4560 else
4561 return true;
4562}
4563
4564static int gfx_v7_0_wait_for_idle(void *handle)
4565{
4566 unsigned i;
4567 u32 tmp;
4568 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4569
4570 for (i = 0; i < adev->usec_timeout; i++) {
4571
4572 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4573
4574 if (!tmp)
4575 return 0;
4576 udelay(1);
4577 }
4578 return -ETIMEDOUT;
4579}
4580
4581static int gfx_v7_0_soft_reset(void *handle)
4582{
4583 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4584 u32 tmp;
4585 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4586
4587
4588 tmp = RREG32(mmGRBM_STATUS);
4589 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4590 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4591 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4592 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4593 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4594 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
4595 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
4596 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
4597
4598 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4599 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
4600 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4601 }
4602
4603
4604 tmp = RREG32(mmGRBM_STATUS2);
4605 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
4606 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4607
4608
4609 tmp = RREG32(mmSRBM_STATUS);
4610 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
4611 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
4612
4613 if (grbm_soft_reset || srbm_soft_reset) {
4614
4615 gfx_v7_0_fini_pg(adev);
4616 gfx_v7_0_update_cg(adev, false);
4617
4618
4619 adev->gfx.rlc.funcs->stop(adev);
4620
4621
4622 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
4623
4624
4625 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
4626
4627 if (grbm_soft_reset) {
4628 tmp = RREG32(mmGRBM_SOFT_RESET);
4629 tmp |= grbm_soft_reset;
4630 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4631 WREG32(mmGRBM_SOFT_RESET, tmp);
4632 tmp = RREG32(mmGRBM_SOFT_RESET);
4633
4634 udelay(50);
4635
4636 tmp &= ~grbm_soft_reset;
4637 WREG32(mmGRBM_SOFT_RESET, tmp);
4638 tmp = RREG32(mmGRBM_SOFT_RESET);
4639 }
4640
4641 if (srbm_soft_reset) {
4642 tmp = RREG32(mmSRBM_SOFT_RESET);
4643 tmp |= srbm_soft_reset;
4644 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4645 WREG32(mmSRBM_SOFT_RESET, tmp);
4646 tmp = RREG32(mmSRBM_SOFT_RESET);
4647
4648 udelay(50);
4649
4650 tmp &= ~srbm_soft_reset;
4651 WREG32(mmSRBM_SOFT_RESET, tmp);
4652 tmp = RREG32(mmSRBM_SOFT_RESET);
4653 }
4654
4655 udelay(50);
4656 }
4657 return 0;
4658}
4659
4660static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4661 enum amdgpu_interrupt_state state)
4662{
4663 u32 cp_int_cntl;
4664
4665 switch (state) {
4666 case AMDGPU_IRQ_STATE_DISABLE:
4667 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4668 cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4669 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4670 break;
4671 case AMDGPU_IRQ_STATE_ENABLE:
4672 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4673 cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4674 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4675 break;
4676 default:
4677 break;
4678 }
4679}
4680
4681static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4682 int me, int pipe,
4683 enum amdgpu_interrupt_state state)
4684{
4685 u32 mec_int_cntl, mec_int_cntl_reg;
4686
4687
4688
4689
4690
4691
4692
4693 if (me == 1) {
4694 switch (pipe) {
4695 case 0:
4696 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4697 break;
4698 case 1:
4699 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
4700 break;
4701 case 2:
4702 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
4703 break;
4704 case 3:
4705 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
4706 break;
4707 default:
4708 DRM_DEBUG("invalid pipe %d\n", pipe);
4709 return;
4710 }
4711 } else {
4712 DRM_DEBUG("invalid me %d\n", me);
4713 return;
4714 }
4715
4716 switch (state) {
4717 case AMDGPU_IRQ_STATE_DISABLE:
4718 mec_int_cntl = RREG32(mec_int_cntl_reg);
4719 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4720 WREG32(mec_int_cntl_reg, mec_int_cntl);
4721 break;
4722 case AMDGPU_IRQ_STATE_ENABLE:
4723 mec_int_cntl = RREG32(mec_int_cntl_reg);
4724 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
4725 WREG32(mec_int_cntl_reg, mec_int_cntl);
4726 break;
4727 default:
4728 break;
4729 }
4730}
4731
4732static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4733 struct amdgpu_irq_src *src,
4734 unsigned type,
4735 enum amdgpu_interrupt_state state)
4736{
4737 u32 cp_int_cntl;
4738
4739 switch (state) {
4740 case AMDGPU_IRQ_STATE_DISABLE:
4741 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4742 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4743 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4744 break;
4745 case AMDGPU_IRQ_STATE_ENABLE:
4746 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4747 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
4748 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4749 break;
4750 default:
4751 break;
4752 }
4753
4754 return 0;
4755}
4756
4757static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4758 struct amdgpu_irq_src *src,
4759 unsigned type,
4760 enum amdgpu_interrupt_state state)
4761{
4762 u32 cp_int_cntl;
4763
4764 switch (state) {
4765 case AMDGPU_IRQ_STATE_DISABLE:
4766 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4767 cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4768 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4769 break;
4770 case AMDGPU_IRQ_STATE_ENABLE:
4771 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4772 cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
4773 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4774 break;
4775 default:
4776 break;
4777 }
4778
4779 return 0;
4780}
4781
4782static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4783 struct amdgpu_irq_src *src,
4784 unsigned type,
4785 enum amdgpu_interrupt_state state)
4786{
4787 switch (type) {
4788 case AMDGPU_CP_IRQ_GFX_EOP:
4789 gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
4790 break;
4791 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4792 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4793 break;
4794 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4795 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4796 break;
4797 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4798 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4799 break;
4800 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4801 gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4802 break;
4803 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4804 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4805 break;
4806 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4807 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4808 break;
4809 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4810 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4811 break;
4812 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4813 gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4814 break;
4815 default:
4816 break;
4817 }
4818 return 0;
4819}
4820
4821static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4822 struct amdgpu_irq_src *source,
4823 struct amdgpu_iv_entry *entry)
4824{
4825 u8 me_id, pipe_id;
4826 struct amdgpu_ring *ring;
4827 int i;
4828
4829 DRM_DEBUG("IH: CP EOP\n");
4830 me_id = (entry->ring_id & 0x0c) >> 2;
4831 pipe_id = (entry->ring_id & 0x03) >> 0;
4832 switch (me_id) {
4833 case 0:
4834 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4835 break;
4836 case 1:
4837 case 2:
4838 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4839 ring = &adev->gfx.compute_ring[i];
4840 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4841 amdgpu_fence_process(ring);
4842 }
4843 break;
4844 }
4845 return 0;
4846}
4847
4848static void gfx_v7_0_fault(struct amdgpu_device *adev,
4849 struct amdgpu_iv_entry *entry)
4850{
4851 struct amdgpu_ring *ring;
4852 u8 me_id, pipe_id;
4853 int i;
4854
4855 me_id = (entry->ring_id & 0x0c) >> 2;
4856 pipe_id = (entry->ring_id & 0x03) >> 0;
4857 switch (me_id) {
4858 case 0:
4859 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4860 break;
4861 case 1:
4862 case 2:
4863 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4864 ring = &adev->gfx.compute_ring[i];
4865 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4866 drm_sched_fault(&ring->sched);
4867 }
4868 break;
4869 }
4870}
4871
4872static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4873 struct amdgpu_irq_src *source,
4874 struct amdgpu_iv_entry *entry)
4875{
4876 DRM_ERROR("Illegal register access in command stream\n");
4877 gfx_v7_0_fault(adev, entry);
4878 return 0;
4879}
4880
4881static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4882 struct amdgpu_irq_src *source,
4883 struct amdgpu_iv_entry *entry)
4884{
4885 DRM_ERROR("Illegal instruction in command stream\n");
4886
4887 gfx_v7_0_fault(adev, entry);
4888 return 0;
4889}
4890
4891static int gfx_v7_0_set_clockgating_state(void *handle,
4892 enum amd_clockgating_state state)
4893{
4894 bool gate = false;
4895 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4896
4897 if (state == AMD_CG_STATE_GATE)
4898 gate = true;
4899
4900 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4901
4902 if (gate) {
4903 gfx_v7_0_enable_mgcg(adev, true);
4904 gfx_v7_0_enable_cgcg(adev, true);
4905 } else {
4906 gfx_v7_0_enable_cgcg(adev, false);
4907 gfx_v7_0_enable_mgcg(adev, false);
4908 }
4909 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4910
4911 return 0;
4912}
4913
4914static int gfx_v7_0_set_powergating_state(void *handle,
4915 enum amd_powergating_state state)
4916{
4917 bool gate = false;
4918 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4919
4920 if (state == AMD_PG_STATE_GATE)
4921 gate = true;
4922
4923 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4924 AMD_PG_SUPPORT_GFX_SMG |
4925 AMD_PG_SUPPORT_GFX_DMG |
4926 AMD_PG_SUPPORT_CP |
4927 AMD_PG_SUPPORT_GDS |
4928 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4929 gfx_v7_0_update_gfx_pg(adev, gate);
4930 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
4931 gfx_v7_0_enable_cp_pg(adev, gate);
4932 gfx_v7_0_enable_gds_pg(adev, gate);
4933 }
4934 }
4935
4936 return 0;
4937}
4938
4939static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
4940 .name = "gfx_v7_0",
4941 .early_init = gfx_v7_0_early_init,
4942 .late_init = gfx_v7_0_late_init,
4943 .sw_init = gfx_v7_0_sw_init,
4944 .sw_fini = gfx_v7_0_sw_fini,
4945 .hw_init = gfx_v7_0_hw_init,
4946 .hw_fini = gfx_v7_0_hw_fini,
4947 .suspend = gfx_v7_0_suspend,
4948 .resume = gfx_v7_0_resume,
4949 .is_idle = gfx_v7_0_is_idle,
4950 .wait_for_idle = gfx_v7_0_wait_for_idle,
4951 .soft_reset = gfx_v7_0_soft_reset,
4952 .set_clockgating_state = gfx_v7_0_set_clockgating_state,
4953 .set_powergating_state = gfx_v7_0_set_powergating_state,
4954};
4955
4956static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
4957 .type = AMDGPU_RING_TYPE_GFX,
4958 .align_mask = 0xff,
4959 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
4960 .support_64bit_ptrs = false,
4961 .get_rptr = gfx_v7_0_ring_get_rptr,
4962 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
4963 .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
4964 .emit_frame_size =
4965 20 +
4966 7 +
4967 5 +
4968 12 + 12 + 12 +
4969 7 + 4 +
4970 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 +
4971 3 + 4,
4972 .emit_ib_size = 4,
4973 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
4974 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
4975 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
4976 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
4977 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
4978 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
4979 .test_ring = gfx_v7_0_ring_test_ring,
4980 .test_ib = gfx_v7_0_ring_test_ib,
4981 .insert_nop = amdgpu_ring_insert_nop,
4982 .pad_ib = amdgpu_ring_generic_pad_ib,
4983 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
4984 .emit_wreg = gfx_v7_0_ring_emit_wreg,
4985 .soft_recovery = gfx_v7_0_ring_soft_recovery,
4986};
4987
4988static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
4989 .type = AMDGPU_RING_TYPE_COMPUTE,
4990 .align_mask = 0xff,
4991 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
4992 .support_64bit_ptrs = false,
4993 .get_rptr = gfx_v7_0_ring_get_rptr,
4994 .get_wptr = gfx_v7_0_ring_get_wptr_compute,
4995 .set_wptr = gfx_v7_0_ring_set_wptr_compute,
4996 .emit_frame_size =
4997 20 +
4998 7 +
4999 5 +
5000 7 +
5001 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 +
5002 7 + 7 + 7,
5003 .emit_ib_size = 4,
5004 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5005 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5006 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
5007 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5008 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5009 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5010 .test_ring = gfx_v7_0_ring_test_ring,
5011 .test_ib = gfx_v7_0_ring_test_ib,
5012 .insert_nop = amdgpu_ring_insert_nop,
5013 .pad_ib = amdgpu_ring_generic_pad_ib,
5014 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5015};
5016
5017static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5018{
5019 int i;
5020
5021 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5022 adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5023 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5024 adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5025}
5026
5027static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5028 .set = gfx_v7_0_set_eop_interrupt_state,
5029 .process = gfx_v7_0_eop_irq,
5030};
5031
5032static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5033 .set = gfx_v7_0_set_priv_reg_fault_state,
5034 .process = gfx_v7_0_priv_reg_irq,
5035};
5036
5037static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5038 .set = gfx_v7_0_set_priv_inst_fault_state,
5039 .process = gfx_v7_0_priv_inst_irq,
5040};
5041
5042static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5043{
5044 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5045 adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5046
5047 adev->gfx.priv_reg_irq.num_types = 1;
5048 adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5049
5050 adev->gfx.priv_inst_irq.num_types = 1;
5051 adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5052}
5053
5054static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5055{
5056
5057 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5058 adev->gds.gws.total_size = 64;
5059 adev->gds.oa.total_size = 16;
5060
5061 if (adev->gds.mem.total_size == 64 * 1024) {
5062 adev->gds.mem.gfx_partition_size = 4096;
5063 adev->gds.mem.cs_partition_size = 4096;
5064
5065 adev->gds.gws.gfx_partition_size = 4;
5066 adev->gds.gws.cs_partition_size = 4;
5067
5068 adev->gds.oa.gfx_partition_size = 4;
5069 adev->gds.oa.cs_partition_size = 1;
5070 } else {
5071 adev->gds.mem.gfx_partition_size = 1024;
5072 adev->gds.mem.cs_partition_size = 1024;
5073
5074 adev->gds.gws.gfx_partition_size = 16;
5075 adev->gds.gws.cs_partition_size = 16;
5076
5077 adev->gds.oa.gfx_partition_size = 4;
5078 adev->gds.oa.cs_partition_size = 4;
5079 }
5080}
5081
5082
5083static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5084{
5085 int i, j, k, counter, active_cu_number = 0;
5086 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5087 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5088 unsigned disable_masks[4 * 2];
5089 u32 ao_cu_num;
5090
5091 if (adev->flags & AMD_IS_APU)
5092 ao_cu_num = 2;
5093 else
5094 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5095
5096 memset(cu_info, 0, sizeof(*cu_info));
5097
5098 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5099
5100 mutex_lock(&adev->grbm_idx_mutex);
5101 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5102 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5103 mask = 1;
5104 ao_bitmap = 0;
5105 counter = 0;
5106 gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
5107 if (i < 4 && j < 2)
5108 gfx_v7_0_set_user_cu_inactive_bitmap(
5109 adev, disable_masks[i * 2 + j]);
5110 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5111 cu_info->bitmap[i][j] = bitmap;
5112
5113 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5114 if (bitmap & mask) {
5115 if (counter < ao_cu_num)
5116 ao_bitmap |= mask;
5117 counter ++;
5118 }
5119 mask <<= 1;
5120 }
5121 active_cu_number += counter;
5122 if (i < 2 && j < 2)
5123 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5124 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5125 }
5126 }
5127 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5128 mutex_unlock(&adev->grbm_idx_mutex);
5129
5130 cu_info->number = active_cu_number;
5131 cu_info->ao_cu_mask = ao_cu_mask;
5132 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5133 cu_info->max_waves_per_simd = 10;
5134 cu_info->max_scratch_slots_per_cu = 32;
5135 cu_info->wave_front_size = 64;
5136 cu_info->lds_size = 64;
5137}
5138
5139const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
5140{
5141 .type = AMD_IP_BLOCK_TYPE_GFX,
5142 .major = 7,
5143 .minor = 0,
5144 .rev = 0,
5145 .funcs = &gfx_v7_0_ip_funcs,
5146};
5147
5148const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
5149{
5150 .type = AMD_IP_BLOCK_TYPE_GFX,
5151 .major = 7,
5152 .minor = 1,
5153 .rev = 0,
5154 .funcs = &gfx_v7_0_ip_funcs,
5155};
5156
5157const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
5158{
5159 .type = AMD_IP_BLOCK_TYPE_GFX,
5160 .major = 7,
5161 .minor = 2,
5162 .rev = 0,
5163 .funcs = &gfx_v7_0_ip_funcs,
5164};
5165
5166const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
5167{
5168 .type = AMD_IP_BLOCK_TYPE_GFX,
5169 .major = 7,
5170 .minor = 3,
5171 .rev = 0,
5172 .funcs = &gfx_v7_0_ip_funcs,
5173};
5174