1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include "amdgpu.h"
23#include "amdgpu_amdkfd.h"
24#include "gc/gc_9_0_offset.h"
25#include "gc/gc_9_0_sh_mask.h"
26#include "vega10_enum.h"
27#include "sdma0/sdma0_4_0_offset.h"
28#include "sdma0/sdma0_4_0_sh_mask.h"
29#include "sdma1/sdma1_4_0_offset.h"
30#include "sdma1/sdma1_4_0_sh_mask.h"
31#include "athub/athub_1_0_offset.h"
32#include "athub/athub_1_0_sh_mask.h"
33#include "oss/osssys_4_0_offset.h"
34#include "oss/osssys_4_0_sh_mask.h"
35#include "soc15_common.h"
36#include "v9_structs.h"
37#include "soc15.h"
38#include "soc15d.h"
39#include "gfx_v9_0.h"
40
41enum hqd_dequeue_request_type {
42 NO_ACTION = 0,
43 DRAIN_PIPE,
44 RESET_WAVES
45};
46
47static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
48{
49 return (struct amdgpu_device *)kgd;
50}
51
52static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
53 uint32_t queue, uint32_t vmid)
54{
55 struct amdgpu_device *adev = get_amdgpu_device(kgd);
56
57 mutex_lock(&adev->srbm_mutex);
58 soc15_grbm_select(adev, mec, pipe, queue, vmid);
59}
60
61static void unlock_srbm(struct kgd_dev *kgd)
62{
63 struct amdgpu_device *adev = get_amdgpu_device(kgd);
64
65 soc15_grbm_select(adev, 0, 0, 0, 0);
66 mutex_unlock(&adev->srbm_mutex);
67}
68
69static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
70 uint32_t queue_id)
71{
72 struct amdgpu_device *adev = get_amdgpu_device(kgd);
73
74 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
75 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
76
77 lock_srbm(kgd, mec, pipe, queue_id, 0);
78}
79
80static uint64_t get_queue_mask(struct amdgpu_device *adev,
81 uint32_t pipe_id, uint32_t queue_id)
82{
83 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
84 queue_id;
85
86 return 1ull << bit;
87}
88
89static void release_queue(struct kgd_dev *kgd)
90{
91 unlock_srbm(kgd);
92}
93
94void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
95 uint32_t sh_mem_config,
96 uint32_t sh_mem_ape1_base,
97 uint32_t sh_mem_ape1_limit,
98 uint32_t sh_mem_bases)
99{
100 struct amdgpu_device *adev = get_amdgpu_device(kgd);
101
102 lock_srbm(kgd, 0, 0, 0, vmid);
103
104 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
105 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
106
107
108 unlock_srbm(kgd);
109}
110
111int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
112 unsigned int vmid)
113{
114 struct amdgpu_device *adev = get_amdgpu_device(kgd);
115
116
117
118
119
120
121
122
123 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
124 ATC_VMID0_PASID_MAPPING__VALID_MASK;
125
126
127
128
129
130
131
132 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
133 pasid_mapping);
134
135 while (!(RREG32(SOC15_REG_OFFSET(
136 ATHUB, 0,
137 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
138 (1U << vmid)))
139 cpu_relax();
140
141 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
142 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
143 1U << vmid);
144
145
146 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
147 pasid_mapping);
148
149 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
150 pasid_mapping);
151
152 while (!(RREG32(SOC15_REG_OFFSET(
153 ATHUB, 0,
154 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
155 (1U << (vmid + 16))))
156 cpu_relax();
157
158 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
159 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
160 1U << (vmid + 16));
161
162
163 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
164 pasid_mapping);
165 return 0;
166}
167
168
169
170
171
172int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
173{
174 struct amdgpu_device *adev = get_amdgpu_device(kgd);
175 uint32_t mec;
176 uint32_t pipe;
177
178 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
179 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
180
181 lock_srbm(kgd, mec, pipe, 0, 0);
182
183 WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
184 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
185 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
186
187 unlock_srbm(kgd);
188
189 return 0;
190}
191
192static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
193 unsigned int engine_id,
194 unsigned int queue_id)
195{
196 uint32_t sdma_engine_reg_base = 0;
197 uint32_t sdma_rlc_reg_offset;
198
199 switch (engine_id) {
200 default:
201 dev_warn(adev->dev,
202 "Invalid sdma engine id (%d), using engine id 0\n",
203 engine_id);
204 fallthrough;
205 case 0:
206 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
207 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
208 break;
209 case 1:
210 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
211 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
212 break;
213 }
214
215 sdma_rlc_reg_offset = sdma_engine_reg_base
216 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
217
218 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
219 queue_id, sdma_rlc_reg_offset);
220
221 return sdma_rlc_reg_offset;
222}
223
224static inline struct v9_mqd *get_mqd(void *mqd)
225{
226 return (struct v9_mqd *)mqd;
227}
228
229static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
230{
231 return (struct v9_sdma_mqd *)mqd;
232}
233
234int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
235 uint32_t queue_id, uint32_t __user *wptr,
236 uint32_t wptr_shift, uint32_t wptr_mask,
237 struct mm_struct *mm)
238{
239 struct amdgpu_device *adev = get_amdgpu_device(kgd);
240 struct v9_mqd *m;
241 uint32_t *mqd_hqd;
242 uint32_t reg, hqd_base, data;
243
244 m = get_mqd(mqd);
245
246 acquire_queue(kgd, pipe_id, queue_id);
247
248
249 mqd_hqd = &m->cp_mqd_base_addr_lo;
250 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
251
252 for (reg = hqd_base;
253 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
254 WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
255
256
257
258 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
259 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
260 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
261
262 if (wptr) {
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279 uint32_t queue_size =
280 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
281 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
282 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
283
284 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
285 guessed_wptr += queue_size;
286 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
287 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
288
289 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
290 lower_32_bits(guessed_wptr));
291 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
292 upper_32_bits(guessed_wptr));
293 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
294 lower_32_bits((uintptr_t)wptr));
295 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
296 upper_32_bits((uintptr_t)wptr));
297 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
298 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
299 }
300
301
302 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
303 REG_SET_FIELD(m->cp_hqd_eop_rptr,
304 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
305
306 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
307 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
308
309 release_queue(kgd);
310
311 return 0;
312}
313
314int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
315 uint32_t pipe_id, uint32_t queue_id,
316 uint32_t doorbell_off)
317{
318 struct amdgpu_device *adev = get_amdgpu_device(kgd);
319 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
320 struct v9_mqd *m;
321 uint32_t mec, pipe;
322 int r;
323
324 m = get_mqd(mqd);
325
326 acquire_queue(kgd, pipe_id, queue_id);
327
328 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
329 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
330
331 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
332 mec, pipe, queue_id);
333
334 spin_lock(&adev->gfx.kiq.ring_lock);
335 r = amdgpu_ring_alloc(kiq_ring, 7);
336 if (r) {
337 pr_err("Failed to alloc KIQ (%d).\n", r);
338 goto out_unlock;
339 }
340
341 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
342 amdgpu_ring_write(kiq_ring,
343 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
344 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
345 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
346 PACKET3_MAP_QUEUES_PIPE(pipe) |
347 PACKET3_MAP_QUEUES_ME((mec - 1)) |
348 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
349 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
350 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
351 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
352 amdgpu_ring_write(kiq_ring,
353 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
354 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
355 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
356 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
357 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
358 amdgpu_ring_commit(kiq_ring);
359
360out_unlock:
361 spin_unlock(&adev->gfx.kiq.ring_lock);
362 release_queue(kgd);
363
364 return r;
365}
366
367int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
368 uint32_t pipe_id, uint32_t queue_id,
369 uint32_t (**dump)[2], uint32_t *n_regs)
370{
371 struct amdgpu_device *adev = get_amdgpu_device(kgd);
372 uint32_t i = 0, reg;
373#define HQD_N_REGS 56
374#define DUMP_REG(addr) do { \
375 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
376 break; \
377 (*dump)[i][0] = (addr) << 2; \
378 (*dump)[i++][1] = RREG32(addr); \
379 } while (0)
380
381 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
382 if (*dump == NULL)
383 return -ENOMEM;
384
385 acquire_queue(kgd, pipe_id, queue_id);
386
387 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
388 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
389 DUMP_REG(reg);
390
391 release_queue(kgd);
392
393 WARN_ON_ONCE(i != HQD_N_REGS);
394 *n_regs = i;
395
396 return 0;
397}
398
399static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
400 uint32_t __user *wptr, struct mm_struct *mm)
401{
402 struct amdgpu_device *adev = get_amdgpu_device(kgd);
403 struct v9_sdma_mqd *m;
404 uint32_t sdma_rlc_reg_offset;
405 unsigned long end_jiffies;
406 uint32_t data;
407 uint64_t data64;
408 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
409
410 m = get_sdma_mqd(mqd);
411 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
412 m->sdma_queue_id);
413
414 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
415 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
416
417 end_jiffies = msecs_to_jiffies(2000) + jiffies;
418 while (true) {
419 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
420 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
421 break;
422 if (time_after(jiffies, end_jiffies)) {
423 pr_err("SDMA RLC not idle in %s\n", __func__);
424 return -ETIME;
425 }
426 usleep_range(500, 1000);
427 }
428
429 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
430 m->sdmax_rlcx_doorbell_offset);
431
432 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
433 ENABLE, 1);
434 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
435 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
436 m->sdmax_rlcx_rb_rptr);
437 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
438 m->sdmax_rlcx_rb_rptr_hi);
439
440 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
441 if (read_user_wptr(mm, wptr64, data64)) {
442 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
443 lower_32_bits(data64));
444 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
445 upper_32_bits(data64));
446 } else {
447 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
448 m->sdmax_rlcx_rb_rptr);
449 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
450 m->sdmax_rlcx_rb_rptr_hi);
451 }
452 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
453
454 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
455 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
456 m->sdmax_rlcx_rb_base_hi);
457 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
458 m->sdmax_rlcx_rb_rptr_addr_lo);
459 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
460 m->sdmax_rlcx_rb_rptr_addr_hi);
461
462 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
463 RB_ENABLE, 1);
464 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
465
466 return 0;
467}
468
469static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
470 uint32_t engine_id, uint32_t queue_id,
471 uint32_t (**dump)[2], uint32_t *n_regs)
472{
473 struct amdgpu_device *adev = get_amdgpu_device(kgd);
474 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
475 engine_id, queue_id);
476 uint32_t i = 0, reg;
477#undef HQD_N_REGS
478#define HQD_N_REGS (19+6+7+10)
479
480 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
481 if (*dump == NULL)
482 return -ENOMEM;
483
484 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
485 DUMP_REG(sdma_rlc_reg_offset + reg);
486 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
487 DUMP_REG(sdma_rlc_reg_offset + reg);
488 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
489 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
490 DUMP_REG(sdma_rlc_reg_offset + reg);
491 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
492 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
493 DUMP_REG(sdma_rlc_reg_offset + reg);
494
495 WARN_ON_ONCE(i != HQD_N_REGS);
496 *n_regs = i;
497
498 return 0;
499}
500
501bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
502 uint32_t pipe_id, uint32_t queue_id)
503{
504 struct amdgpu_device *adev = get_amdgpu_device(kgd);
505 uint32_t act;
506 bool retval = false;
507 uint32_t low, high;
508
509 acquire_queue(kgd, pipe_id, queue_id);
510 act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
511 if (act) {
512 low = lower_32_bits(queue_address >> 8);
513 high = upper_32_bits(queue_address >> 8);
514
515 if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
516 high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
517 retval = true;
518 }
519 release_queue(kgd);
520 return retval;
521}
522
523static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
524{
525 struct amdgpu_device *adev = get_amdgpu_device(kgd);
526 struct v9_sdma_mqd *m;
527 uint32_t sdma_rlc_reg_offset;
528 uint32_t sdma_rlc_rb_cntl;
529
530 m = get_sdma_mqd(mqd);
531 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
532 m->sdma_queue_id);
533
534 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
535
536 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
537 return true;
538
539 return false;
540}
541
542int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
543 enum kfd_preempt_type reset_type,
544 unsigned int utimeout, uint32_t pipe_id,
545 uint32_t queue_id)
546{
547 struct amdgpu_device *adev = get_amdgpu_device(kgd);
548 enum hqd_dequeue_request_type type;
549 unsigned long end_jiffies;
550 uint32_t temp;
551 struct v9_mqd *m = get_mqd(mqd);
552
553 if (amdgpu_in_reset(adev))
554 return -EIO;
555
556 acquire_queue(kgd, pipe_id, queue_id);
557
558 if (m->cp_hqd_vmid == 0)
559 WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
560
561 switch (reset_type) {
562 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
563 type = DRAIN_PIPE;
564 break;
565 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
566 type = RESET_WAVES;
567 break;
568 default:
569 type = DRAIN_PIPE;
570 break;
571 }
572
573 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
574
575 end_jiffies = (utimeout * HZ / 1000) + jiffies;
576 while (true) {
577 temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
578 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
579 break;
580 if (time_after(jiffies, end_jiffies)) {
581 pr_err("cp queue preemption time out.\n");
582 release_queue(kgd);
583 return -ETIME;
584 }
585 usleep_range(500, 1000);
586 }
587
588 release_queue(kgd);
589 return 0;
590}
591
592static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
593 unsigned int utimeout)
594{
595 struct amdgpu_device *adev = get_amdgpu_device(kgd);
596 struct v9_sdma_mqd *m;
597 uint32_t sdma_rlc_reg_offset;
598 uint32_t temp;
599 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
600
601 m = get_sdma_mqd(mqd);
602 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
603 m->sdma_queue_id);
604
605 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
606 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
607 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
608
609 while (true) {
610 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
611 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
612 break;
613 if (time_after(jiffies, end_jiffies)) {
614 pr_err("SDMA RLC not idle in %s\n", __func__);
615 return -ETIME;
616 }
617 usleep_range(500, 1000);
618 }
619
620 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
621 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
622 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
623 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
624
625 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
626 m->sdmax_rlcx_rb_rptr_hi =
627 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
628
629 return 0;
630}
631
632bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
633 uint8_t vmid, uint16_t *p_pasid)
634{
635 uint32_t value;
636 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
637
638 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
639 + vmid);
640 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
641
642 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
643}
644
645int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
646{
647 return 0;
648}
649
650int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
651 unsigned int watch_point_id,
652 uint32_t cntl_val,
653 uint32_t addr_hi,
654 uint32_t addr_lo)
655{
656 return 0;
657}
658
659int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
660 uint32_t gfx_index_val,
661 uint32_t sq_cmd)
662{
663 struct amdgpu_device *adev = get_amdgpu_device(kgd);
664 uint32_t data = 0;
665
666 mutex_lock(&adev->grbm_idx_mutex);
667
668 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
669 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
670
671 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
672 INSTANCE_BROADCAST_WRITES, 1);
673 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
674 SH_BROADCAST_WRITES, 1);
675 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
676 SE_BROADCAST_WRITES, 1);
677
678 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
679 mutex_unlock(&adev->grbm_idx_mutex);
680
681 return 0;
682}
683
684uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
685 unsigned int watch_point_id,
686 unsigned int reg_offset)
687{
688 return 0;
689}
690
691void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
692 uint32_t vmid, uint64_t page_table_base)
693{
694 struct amdgpu_device *adev = get_amdgpu_device(kgd);
695
696 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
697 pr_err("trying to set page table base for wrong VMID %u\n",
698 vmid);
699 return;
700 }
701
702 adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
703
704 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
705}
706
707static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
708{
709 mutex_lock(&adev->srbm_mutex);
710 mutex_lock(&adev->grbm_idx_mutex);
711
712}
713
714static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
715{
716 mutex_unlock(&adev->grbm_idx_mutex);
717 mutex_unlock(&adev->srbm_mutex);
718}
719
720
721
722
723
724
725
726
727
728
729
730
731static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
732 int *wave_cnt, int *vmid)
733{
734 int pipe_idx;
735 int queue_slot;
736 unsigned int reg_val;
737
738
739
740
741
742
743 *vmid = 0xFF;
744 *wave_cnt = 0;
745 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
746 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
747 soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
748 reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
749 queue_slot);
750 *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
751 if (*wave_cnt != 0)
752 *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
753 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
803 int *pasid_wave_cnt, int *max_waves_per_cu)
804{
805 int qidx;
806 int vmid;
807 int se_idx;
808 int sh_idx;
809 int se_cnt;
810 int sh_cnt;
811 int wave_cnt;
812 int queue_map;
813 int pasid_tmp;
814 int max_queue_cnt;
815 int vmid_wave_cnt = 0;
816 struct amdgpu_device *adev;
817 DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
818
819 adev = get_amdgpu_device(kgd);
820 lock_spi_csq_mutexes(adev);
821 soc15_grbm_select(adev, 1, 0, 0, 0);
822
823
824
825
826
827 bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
828 KGD_MAX_QUEUES);
829 max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
830 adev->gfx.mec.num_queue_per_pipe;
831 sh_cnt = adev->gfx.config.max_sh_per_se;
832 se_cnt = adev->gfx.config.max_shader_engines;
833 for (se_idx = 0; se_idx < se_cnt; se_idx++) {
834 for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
835
836 gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
837 queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
838 mmSPI_CSQ_WF_ACTIVE_STATUS));
839
840
841
842
843
844
845
846 for (qidx = 0; qidx < max_queue_cnt; qidx++) {
847
848
849
850
851 if (!test_bit(qidx, cp_queue_bitmap))
852 continue;
853
854 if (!(queue_map & (1 << qidx)))
855 continue;
856
857
858 get_wave_count(adev, qidx, &wave_cnt, &vmid);
859 if (wave_cnt != 0) {
860 pasid_tmp =
861 RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
862 mmIH_VMID_0_LUT) + vmid);
863 if (pasid_tmp == pasid)
864 vmid_wave_cnt += wave_cnt;
865 }
866 }
867 }
868 }
869
870 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
871 soc15_grbm_select(adev, 0, 0, 0, 0);
872 unlock_spi_csq_mutexes(adev);
873
874
875 *pasid_wave_cnt = vmid_wave_cnt;
876 *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
877 adev->gfx.cu_info.max_waves_per_simd;
878}
879
880const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
881 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
882 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
883 .init_interrupts = kgd_gfx_v9_init_interrupts,
884 .hqd_load = kgd_gfx_v9_hqd_load,
885 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
886 .hqd_sdma_load = kgd_hqd_sdma_load,
887 .hqd_dump = kgd_gfx_v9_hqd_dump,
888 .hqd_sdma_dump = kgd_hqd_sdma_dump,
889 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
890 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
891 .hqd_destroy = kgd_gfx_v9_hqd_destroy,
892 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
893 .address_watch_disable = kgd_gfx_v9_address_watch_disable,
894 .address_watch_execute = kgd_gfx_v9_address_watch_execute,
895 .wave_control_execute = kgd_gfx_v9_wave_control_execute,
896 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
897 .get_atc_vmid_pasid_mapping_info =
898 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
899 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
900 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
901};
902