1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include "amdgpu.h"
23#include "amdgpu_amdkfd.h"
24#include "gc/gc_9_0_offset.h"
25#include "gc/gc_9_0_sh_mask.h"
26#include "vega10_enum.h"
27#include "sdma0/sdma0_4_0_offset.h"
28#include "sdma0/sdma0_4_0_sh_mask.h"
29#include "sdma1/sdma1_4_0_offset.h"
30#include "sdma1/sdma1_4_0_sh_mask.h"
31#include "athub/athub_1_0_offset.h"
32#include "athub/athub_1_0_sh_mask.h"
33#include "oss/osssys_4_0_offset.h"
34#include "oss/osssys_4_0_sh_mask.h"
35#include "soc15_common.h"
36#include "v9_structs.h"
37#include "soc15.h"
38#include "soc15d.h"
39#include "gfx_v9_0.h"
40#include "amdgpu_amdkfd_gfx_v9.h"
41
42enum hqd_dequeue_request_type {
43 NO_ACTION = 0,
44 DRAIN_PIPE,
45 RESET_WAVES,
46 SAVE_WAVES
47};
48
49static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
50{
51 return (struct amdgpu_device *)kgd;
52}
53
54static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
55 uint32_t queue, uint32_t vmid)
56{
57 struct amdgpu_device *adev = get_amdgpu_device(kgd);
58
59 mutex_lock(&adev->srbm_mutex);
60 soc15_grbm_select(adev, mec, pipe, queue, vmid);
61}
62
63static void unlock_srbm(struct kgd_dev *kgd)
64{
65 struct amdgpu_device *adev = get_amdgpu_device(kgd);
66
67 soc15_grbm_select(adev, 0, 0, 0, 0);
68 mutex_unlock(&adev->srbm_mutex);
69}
70
71static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
72 uint32_t queue_id)
73{
74 struct amdgpu_device *adev = get_amdgpu_device(kgd);
75
76 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
77 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
78
79 lock_srbm(kgd, mec, pipe, queue_id, 0);
80}
81
82static uint64_t get_queue_mask(struct amdgpu_device *adev,
83 uint32_t pipe_id, uint32_t queue_id)
84{
85 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
86 queue_id;
87
88 return 1ull << bit;
89}
90
91static void release_queue(struct kgd_dev *kgd)
92{
93 unlock_srbm(kgd);
94}
95
96void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
97 uint32_t sh_mem_config,
98 uint32_t sh_mem_ape1_base,
99 uint32_t sh_mem_ape1_limit,
100 uint32_t sh_mem_bases)
101{
102 struct amdgpu_device *adev = get_amdgpu_device(kgd);
103
104 lock_srbm(kgd, 0, 0, 0, vmid);
105
106 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
107 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
108
109
110 unlock_srbm(kgd);
111}
112
113int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
114 unsigned int vmid)
115{
116 struct amdgpu_device *adev = get_amdgpu_device(kgd);
117
118
119
120
121
122
123
124
125 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
126 ATC_VMID0_PASID_MAPPING__VALID_MASK;
127
128
129
130
131
132
133
134 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
135 pasid_mapping);
136
137 while (!(RREG32(SOC15_REG_OFFSET(
138 ATHUB, 0,
139 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
140 (1U << vmid)))
141 cpu_relax();
142
143 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
144 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
145 1U << vmid);
146
147
148 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
149 pasid_mapping);
150
151 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
152 pasid_mapping);
153
154 while (!(RREG32(SOC15_REG_OFFSET(
155 ATHUB, 0,
156 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
157 (1U << (vmid + 16))))
158 cpu_relax();
159
160 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
161 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
162 1U << (vmid + 16));
163
164
165 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
166 pasid_mapping);
167 return 0;
168}
169
170
171
172
173
174int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
175{
176 struct amdgpu_device *adev = get_amdgpu_device(kgd);
177 uint32_t mec;
178 uint32_t pipe;
179
180 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
181 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
182
183 lock_srbm(kgd, mec, pipe, 0, 0);
184
185 WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
186 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
187 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
188
189 unlock_srbm(kgd);
190
191 return 0;
192}
193
194static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
195 unsigned int engine_id,
196 unsigned int queue_id)
197{
198 uint32_t sdma_engine_reg_base = 0;
199 uint32_t sdma_rlc_reg_offset;
200
201 switch (engine_id) {
202 default:
203 dev_warn(adev->dev,
204 "Invalid sdma engine id (%d), using engine id 0\n",
205 engine_id);
206 fallthrough;
207 case 0:
208 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
209 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
210 break;
211 case 1:
212 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
213 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
214 break;
215 }
216
217 sdma_rlc_reg_offset = sdma_engine_reg_base
218 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
219
220 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
221 queue_id, sdma_rlc_reg_offset);
222
223 return sdma_rlc_reg_offset;
224}
225
226static inline struct v9_mqd *get_mqd(void *mqd)
227{
228 return (struct v9_mqd *)mqd;
229}
230
231static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
232{
233 return (struct v9_sdma_mqd *)mqd;
234}
235
236int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
237 uint32_t queue_id, uint32_t __user *wptr,
238 uint32_t wptr_shift, uint32_t wptr_mask,
239 struct mm_struct *mm)
240{
241 struct amdgpu_device *adev = get_amdgpu_device(kgd);
242 struct v9_mqd *m;
243 uint32_t *mqd_hqd;
244 uint32_t reg, hqd_base, data;
245
246 m = get_mqd(mqd);
247
248 acquire_queue(kgd, pipe_id, queue_id);
249
250
251 mqd_hqd = &m->cp_mqd_base_addr_lo;
252 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
253
254 for (reg = hqd_base;
255 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
256 WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
257
258
259
260 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
261 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
262 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
263
264 if (wptr) {
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281 uint32_t queue_size =
282 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
283 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
284 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
285
286 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
287 guessed_wptr += queue_size;
288 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
289 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
290
291 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
292 lower_32_bits(guessed_wptr));
293 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
294 upper_32_bits(guessed_wptr));
295 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
296 lower_32_bits((uintptr_t)wptr));
297 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
298 upper_32_bits((uintptr_t)wptr));
299 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
300 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
301 }
302
303
304 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
305 REG_SET_FIELD(m->cp_hqd_eop_rptr,
306 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
307
308 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
309 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
310
311 release_queue(kgd);
312
313 return 0;
314}
315
316int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
317 uint32_t pipe_id, uint32_t queue_id,
318 uint32_t doorbell_off)
319{
320 struct amdgpu_device *adev = get_amdgpu_device(kgd);
321 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
322 struct v9_mqd *m;
323 uint32_t mec, pipe;
324 int r;
325
326 m = get_mqd(mqd);
327
328 acquire_queue(kgd, pipe_id, queue_id);
329
330 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
331 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
332
333 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
334 mec, pipe, queue_id);
335
336 spin_lock(&adev->gfx.kiq.ring_lock);
337 r = amdgpu_ring_alloc(kiq_ring, 7);
338 if (r) {
339 pr_err("Failed to alloc KIQ (%d).\n", r);
340 goto out_unlock;
341 }
342
343 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
344 amdgpu_ring_write(kiq_ring,
345 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
346 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
347 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
348 PACKET3_MAP_QUEUES_PIPE(pipe) |
349 PACKET3_MAP_QUEUES_ME((mec - 1)) |
350 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
351 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
352 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
353 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
354 amdgpu_ring_write(kiq_ring,
355 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
356 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
357 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
358 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
359 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
360 amdgpu_ring_commit(kiq_ring);
361
362out_unlock:
363 spin_unlock(&adev->gfx.kiq.ring_lock);
364 release_queue(kgd);
365
366 return r;
367}
368
369int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
370 uint32_t pipe_id, uint32_t queue_id,
371 uint32_t (**dump)[2], uint32_t *n_regs)
372{
373 struct amdgpu_device *adev = get_amdgpu_device(kgd);
374 uint32_t i = 0, reg;
375#define HQD_N_REGS 56
376#define DUMP_REG(addr) do { \
377 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
378 break; \
379 (*dump)[i][0] = (addr) << 2; \
380 (*dump)[i++][1] = RREG32(addr); \
381 } while (0)
382
383 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
384 if (*dump == NULL)
385 return -ENOMEM;
386
387 acquire_queue(kgd, pipe_id, queue_id);
388
389 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
390 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
391 DUMP_REG(reg);
392
393 release_queue(kgd);
394
395 WARN_ON_ONCE(i != HQD_N_REGS);
396 *n_regs = i;
397
398 return 0;
399}
400
401static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
402 uint32_t __user *wptr, struct mm_struct *mm)
403{
404 struct amdgpu_device *adev = get_amdgpu_device(kgd);
405 struct v9_sdma_mqd *m;
406 uint32_t sdma_rlc_reg_offset;
407 unsigned long end_jiffies;
408 uint32_t data;
409 uint64_t data64;
410 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
411
412 m = get_sdma_mqd(mqd);
413 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
414 m->sdma_queue_id);
415
416 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
417 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
418
419 end_jiffies = msecs_to_jiffies(2000) + jiffies;
420 while (true) {
421 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
422 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
423 break;
424 if (time_after(jiffies, end_jiffies)) {
425 pr_err("SDMA RLC not idle in %s\n", __func__);
426 return -ETIME;
427 }
428 usleep_range(500, 1000);
429 }
430
431 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
432 m->sdmax_rlcx_doorbell_offset);
433
434 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
435 ENABLE, 1);
436 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
437 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
438 m->sdmax_rlcx_rb_rptr);
439 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
440 m->sdmax_rlcx_rb_rptr_hi);
441
442 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
443 if (read_user_wptr(mm, wptr64, data64)) {
444 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
445 lower_32_bits(data64));
446 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
447 upper_32_bits(data64));
448 } else {
449 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
450 m->sdmax_rlcx_rb_rptr);
451 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
452 m->sdmax_rlcx_rb_rptr_hi);
453 }
454 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
455
456 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
457 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
458 m->sdmax_rlcx_rb_base_hi);
459 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
460 m->sdmax_rlcx_rb_rptr_addr_lo);
461 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
462 m->sdmax_rlcx_rb_rptr_addr_hi);
463
464 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
465 RB_ENABLE, 1);
466 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
467
468 return 0;
469}
470
471static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
472 uint32_t engine_id, uint32_t queue_id,
473 uint32_t (**dump)[2], uint32_t *n_regs)
474{
475 struct amdgpu_device *adev = get_amdgpu_device(kgd);
476 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
477 engine_id, queue_id);
478 uint32_t i = 0, reg;
479#undef HQD_N_REGS
480#define HQD_N_REGS (19+6+7+10)
481
482 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
483 if (*dump == NULL)
484 return -ENOMEM;
485
486 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
487 DUMP_REG(sdma_rlc_reg_offset + reg);
488 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
489 DUMP_REG(sdma_rlc_reg_offset + reg);
490 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
491 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
492 DUMP_REG(sdma_rlc_reg_offset + reg);
493 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
494 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
495 DUMP_REG(sdma_rlc_reg_offset + reg);
496
497 WARN_ON_ONCE(i != HQD_N_REGS);
498 *n_regs = i;
499
500 return 0;
501}
502
503bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
504 uint32_t pipe_id, uint32_t queue_id)
505{
506 struct amdgpu_device *adev = get_amdgpu_device(kgd);
507 uint32_t act;
508 bool retval = false;
509 uint32_t low, high;
510
511 acquire_queue(kgd, pipe_id, queue_id);
512 act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
513 if (act) {
514 low = lower_32_bits(queue_address >> 8);
515 high = upper_32_bits(queue_address >> 8);
516
517 if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
518 high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
519 retval = true;
520 }
521 release_queue(kgd);
522 return retval;
523}
524
525static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
526{
527 struct amdgpu_device *adev = get_amdgpu_device(kgd);
528 struct v9_sdma_mqd *m;
529 uint32_t sdma_rlc_reg_offset;
530 uint32_t sdma_rlc_rb_cntl;
531
532 m = get_sdma_mqd(mqd);
533 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
534 m->sdma_queue_id);
535
536 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
537
538 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
539 return true;
540
541 return false;
542}
543
544int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
545 enum kfd_preempt_type reset_type,
546 unsigned int utimeout, uint32_t pipe_id,
547 uint32_t queue_id)
548{
549 struct amdgpu_device *adev = get_amdgpu_device(kgd);
550 enum hqd_dequeue_request_type type;
551 unsigned long end_jiffies;
552 uint32_t temp;
553 struct v9_mqd *m = get_mqd(mqd);
554
555 if (amdgpu_in_reset(adev))
556 return -EIO;
557
558 acquire_queue(kgd, pipe_id, queue_id);
559
560 if (m->cp_hqd_vmid == 0)
561 WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
562
563 switch (reset_type) {
564 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
565 type = DRAIN_PIPE;
566 break;
567 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
568 type = RESET_WAVES;
569 break;
570 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
571 type = SAVE_WAVES;
572 break;
573 default:
574 type = DRAIN_PIPE;
575 break;
576 }
577
578 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
579
580 end_jiffies = (utimeout * HZ / 1000) + jiffies;
581 while (true) {
582 temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
583 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
584 break;
585 if (time_after(jiffies, end_jiffies)) {
586 pr_err("cp queue preemption time out.\n");
587 release_queue(kgd);
588 return -ETIME;
589 }
590 usleep_range(500, 1000);
591 }
592
593 release_queue(kgd);
594 return 0;
595}
596
597static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
598 unsigned int utimeout)
599{
600 struct amdgpu_device *adev = get_amdgpu_device(kgd);
601 struct v9_sdma_mqd *m;
602 uint32_t sdma_rlc_reg_offset;
603 uint32_t temp;
604 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
605
606 m = get_sdma_mqd(mqd);
607 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
608 m->sdma_queue_id);
609
610 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
611 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
612 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
613
614 while (true) {
615 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
616 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
617 break;
618 if (time_after(jiffies, end_jiffies)) {
619 pr_err("SDMA RLC not idle in %s\n", __func__);
620 return -ETIME;
621 }
622 usleep_range(500, 1000);
623 }
624
625 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
626 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
627 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
628 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
629
630 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
631 m->sdmax_rlcx_rb_rptr_hi =
632 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
633
634 return 0;
635}
636
637bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
638 uint8_t vmid, uint16_t *p_pasid)
639{
640 uint32_t value;
641 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
642
643 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
644 + vmid);
645 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
646
647 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
648}
649
650int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
651{
652 return 0;
653}
654
655int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
656 unsigned int watch_point_id,
657 uint32_t cntl_val,
658 uint32_t addr_hi,
659 uint32_t addr_lo)
660{
661 return 0;
662}
663
664int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
665 uint32_t gfx_index_val,
666 uint32_t sq_cmd)
667{
668 struct amdgpu_device *adev = get_amdgpu_device(kgd);
669 uint32_t data = 0;
670
671 mutex_lock(&adev->grbm_idx_mutex);
672
673 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
674 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
675
676 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
677 INSTANCE_BROADCAST_WRITES, 1);
678 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
679 SH_BROADCAST_WRITES, 1);
680 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
681 SE_BROADCAST_WRITES, 1);
682
683 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
684 mutex_unlock(&adev->grbm_idx_mutex);
685
686 return 0;
687}
688
689uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
690 unsigned int watch_point_id,
691 unsigned int reg_offset)
692{
693 return 0;
694}
695
696void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
697 uint32_t vmid, uint64_t page_table_base)
698{
699 struct amdgpu_device *adev = get_amdgpu_device(kgd);
700
701 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
702 pr_err("trying to set page table base for wrong VMID %u\n",
703 vmid);
704 return;
705 }
706
707 adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
708
709 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
710}
711
712static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
713{
714 mutex_lock(&adev->srbm_mutex);
715 mutex_lock(&adev->grbm_idx_mutex);
716
717}
718
719static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
720{
721 mutex_unlock(&adev->grbm_idx_mutex);
722 mutex_unlock(&adev->srbm_mutex);
723}
724
725
726
727
728
729
730
731
732
733
734
735
736static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
737 int *wave_cnt, int *vmid)
738{
739 int pipe_idx;
740 int queue_slot;
741 unsigned int reg_val;
742
743
744
745
746
747
748 *vmid = 0xFF;
749 *wave_cnt = 0;
750 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
751 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
752 soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
753 reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
754 queue_slot);
755 *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
756 if (*wave_cnt != 0)
757 *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
758 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
759}
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
808 int *pasid_wave_cnt, int *max_waves_per_cu)
809{
810 int qidx;
811 int vmid;
812 int se_idx;
813 int sh_idx;
814 int se_cnt;
815 int sh_cnt;
816 int wave_cnt;
817 int queue_map;
818 int pasid_tmp;
819 int max_queue_cnt;
820 int vmid_wave_cnt = 0;
821 struct amdgpu_device *adev;
822 DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
823
824 adev = get_amdgpu_device(kgd);
825 lock_spi_csq_mutexes(adev);
826 soc15_grbm_select(adev, 1, 0, 0, 0);
827
828
829
830
831
832 bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
833 KGD_MAX_QUEUES);
834 max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
835 adev->gfx.mec.num_queue_per_pipe;
836 sh_cnt = adev->gfx.config.max_sh_per_se;
837 se_cnt = adev->gfx.config.max_shader_engines;
838 for (se_idx = 0; se_idx < se_cnt; se_idx++) {
839 for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
840
841 gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
842 queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
843 mmSPI_CSQ_WF_ACTIVE_STATUS));
844
845
846
847
848
849
850
851 for (qidx = 0; qidx < max_queue_cnt; qidx++) {
852
853
854
855
856 if (!test_bit(qidx, cp_queue_bitmap))
857 continue;
858
859 if (!(queue_map & (1 << qidx)))
860 continue;
861
862
863 get_wave_count(adev, qidx, &wave_cnt, &vmid);
864 if (wave_cnt != 0) {
865 pasid_tmp =
866 RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
867 mmIH_VMID_0_LUT) + vmid);
868 if (pasid_tmp == pasid)
869 vmid_wave_cnt += wave_cnt;
870 }
871 }
872 }
873 }
874
875 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
876 soc15_grbm_select(adev, 0, 0, 0, 0);
877 unlock_spi_csq_mutexes(adev);
878
879
880 *pasid_wave_cnt = vmid_wave_cnt;
881 *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
882 adev->gfx.cu_info.max_waves_per_simd;
883}
884
885void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
886 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
887{
888 struct amdgpu_device *adev = get_amdgpu_device(kgd);
889
890 lock_srbm(kgd, 0, 0, 0, vmid);
891
892
893
894
895 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
896 lower_32_bits(tba_addr >> 8));
897 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
898 upper_32_bits(tba_addr >> 8));
899
900
901
902
903 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
904 lower_32_bits(tma_addr >> 8));
905 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
906 upper_32_bits(tma_addr >> 8));
907
908 unlock_srbm(kgd);
909}
910
911const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
912 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
913 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
914 .init_interrupts = kgd_gfx_v9_init_interrupts,
915 .hqd_load = kgd_gfx_v9_hqd_load,
916 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
917 .hqd_sdma_load = kgd_hqd_sdma_load,
918 .hqd_dump = kgd_gfx_v9_hqd_dump,
919 .hqd_sdma_dump = kgd_hqd_sdma_dump,
920 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
921 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
922 .hqd_destroy = kgd_gfx_v9_hqd_destroy,
923 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
924 .address_watch_disable = kgd_gfx_v9_address_watch_disable,
925 .address_watch_execute = kgd_gfx_v9_address_watch_execute,
926 .wave_control_execute = kgd_gfx_v9_wave_control_execute,
927 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
928 .get_atc_vmid_pasid_mapping_info =
929 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
930 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
931 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
932 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
933};
934