1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/log2.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/mutex.h>
30#include <linux/device.h>
31
32#include "kfd_pm4_headers.h"
33#include "kfd_pm4_headers_diq.h"
34#include "kfd_kernel_queue.h"
35#include "kfd_priv.h"
36#include "kfd_pm4_opcodes.h"
37#include "cik_regs.h"
38#include "kfd_dbgmgr.h"
39#include "kfd_dbgdev.h"
40#include "kfd_device_queue_manager.h"
41#include "../../radeon/cik_reg.h"
42
43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44{
45 dev->kfd2kgd->address_watch_disable(dev->kgd);
46}
47
48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
49 unsigned int pasid, uint64_t vmid0_address,
50 uint32_t *packet_buff, size_t size_in_bytes)
51{
52 struct pm4__release_mem *rm_packet;
53 struct pm4__indirect_buffer_pasid *ib_packet;
54 struct kfd_mem_obj *mem_obj;
55 size_t pq_packets_size_in_bytes;
56 union ULARGE_INTEGER *largep;
57 union ULARGE_INTEGER addr;
58 struct kernel_queue *kq;
59 uint64_t *rm_state;
60 unsigned int *ib_packet_buff;
61 int status;
62
63 if (WARN_ON(!size_in_bytes))
64 return -EINVAL;
65
66 kq = dbgdev->kq;
67
68 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
69 sizeof(struct pm4__indirect_buffer_pasid);
70
71
72
73
74
75
76 status = kq->ops.acquire_packet_buffer(kq,
77 pq_packets_size_in_bytes / sizeof(uint32_t),
78 &ib_packet_buff);
79 if (status) {
80 pr_err("acquire_packet_buffer failed\n");
81 return status;
82 }
83
84 memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
85
86 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
87
88 ib_packet->header.count = 3;
89 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
90 ib_packet->header.type = PM4_TYPE_3;
91
92 largep = (union ULARGE_INTEGER *) &vmid0_address;
93
94 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
95 ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
96
97 ib_packet->control = (1 << 23) | (1 << 31) |
98 ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
99
100 ib_packet->bitfields5.pasid = pasid;
101
102
103
104
105
106
107
108
109
110 rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111 (sizeof(struct pm4__indirect_buffer_pasid) /
112 sizeof(unsigned int)));
113
114 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
115 &mem_obj);
116
117 if (status) {
118 pr_err("Failed to allocate GART memory\n");
119 kq->ops.rollback_packet(kq);
120 return status;
121 }
122
123 rm_state = (uint64_t *) mem_obj->cpu_ptr;
124
125 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
126
127 rm_packet->header.opcode = IT_RELEASE_MEM;
128 rm_packet->header.type = PM4_TYPE_3;
129 rm_packet->header.count = sizeof(struct pm4__release_mem) /
130 sizeof(unsigned int) - 2;
131
132 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
133 rm_packet->bitfields2.event_index =
134 event_index___release_mem__end_of_pipe;
135
136 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
137 rm_packet->bitfields2.atc = 0;
138 rm_packet->bitfields2.tc_wb_action_ena = 1;
139
140 addr.quad_part = mem_obj->gpu_addr;
141
142 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
143 rm_packet->address_hi = addr.u.high_part;
144
145 rm_packet->bitfields3.data_sel =
146 data_sel___release_mem__send_64_bit_data;
147
148 rm_packet->bitfields3.int_sel =
149 int_sel___release_mem__send_data_after_write_confirm;
150
151 rm_packet->bitfields3.dst_sel =
152 dst_sel___release_mem__memory_controller;
153
154 rm_packet->data_lo = QUEUESTATE__ACTIVE;
155
156 kq->ops.submit_packet(kq);
157
158
159 status = amdkfd_fence_wait_timeout(
160 (unsigned int *) rm_state,
161 QUEUESTATE__ACTIVE, 1500);
162
163 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
164
165 return status;
166}
167
168static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
169{
170
171
172
173
174
175 dbgdev->kq = NULL;
176
177 return 0;
178}
179
180static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
181{
182 struct queue_properties properties;
183 unsigned int qid;
184 struct kernel_queue *kq = NULL;
185 int status;
186
187 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188 &properties, 0, KFD_QUEUE_TYPE_DIQ,
189 &qid);
190
191 if (status) {
192 pr_err("Failed to create DIQ\n");
193 return status;
194 }
195
196 pr_debug("DIQ Created with queue id: %d\n", qid);
197
198 kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
199
200 if (!kq) {
201 pr_err("Error getting DIQ\n");
202 pqm_destroy_queue(dbgdev->pqm, qid);
203 return -EFAULT;
204 }
205
206 dbgdev->kq = kq;
207
208 return status;
209}
210
211static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
212{
213
214 dbgdev_address_watch_disable_nodiq(dbgdev->dev);
215 return 0;
216}
217
218static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
219{
220
221 int status;
222
223 status = pqm_destroy_queue(dbgdev->pqm,
224 dbgdev->kq->queue->properties.queue_id);
225 dbgdev->kq = NULL;
226
227 return status;
228}
229
230static void dbgdev_address_watch_set_registers(
231 const struct dbg_address_watch_info *adw_info,
232 union TCP_WATCH_ADDR_H_BITS *addrHi,
233 union TCP_WATCH_ADDR_L_BITS *addrLo,
234 union TCP_WATCH_CNTL_BITS *cntl,
235 unsigned int index, unsigned int vmid)
236{
237 union ULARGE_INTEGER addr;
238
239 addr.quad_part = 0;
240 addrHi->u32All = 0;
241 addrLo->u32All = 0;
242 cntl->u32All = 0;
243
244 if (adw_info->watch_mask)
245 cntl->bitfields.mask =
246 (uint32_t) (adw_info->watch_mask[index] &
247 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
248 else
249 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
250
251 addr.quad_part = (unsigned long long) adw_info->watch_address[index];
252
253 addrHi->bitfields.addr = addr.u.high_part &
254 ADDRESS_WATCH_REG_ADDHIGH_MASK;
255 addrLo->bitfields.addr =
256 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
257
258 cntl->bitfields.mode = adw_info->watch_mode[index];
259 cntl->bitfields.vmid = (uint32_t) vmid;
260
261 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
262
263 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
264 pr_debug("\t\t%20s %08x\n", "set reg add high :",
265 addrHi->bitfields.addr);
266 pr_debug("\t\t%20s %08x\n", "set reg add low :",
267 addrLo->bitfields.addr);
268}
269
270static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
271 struct dbg_address_watch_info *adw_info)
272{
273 union TCP_WATCH_ADDR_H_BITS addrHi;
274 union TCP_WATCH_ADDR_L_BITS addrLo;
275 union TCP_WATCH_CNTL_BITS cntl;
276 struct kfd_process_device *pdd;
277 unsigned int i;
278
279
280 pdd = kfd_get_process_device_data(dbgdev->dev,
281 adw_info->process);
282 if (!pdd) {
283 pr_err("Failed to get pdd for wave control no DIQ\n");
284 return -EFAULT;
285 }
286
287 addrHi.u32All = 0;
288 addrLo.u32All = 0;
289 cntl.u32All = 0;
290
291 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
292 (adw_info->num_watch_points == 0)) {
293 pr_err("num_watch_points is invalid\n");
294 return -EINVAL;
295 }
296
297 if (!adw_info->watch_mode || !adw_info->watch_address) {
298 pr_err("adw_info fields are not valid\n");
299 return -EINVAL;
300 }
301
302 for (i = 0; i < adw_info->num_watch_points; i++) {
303 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
304 &cntl, i, pdd->qpd.vmid);
305
306 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
307 pr_debug("\t\t%20s %08x\n", "register index :", i);
308 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
309 pr_debug("\t\t%20s %08x\n", "Address Low is :",
310 addrLo.bitfields.addr);
311 pr_debug("\t\t%20s %08x\n", "Address high is :",
312 addrHi.bitfields.addr);
313 pr_debug("\t\t%20s %08x\n", "Address high is :",
314 addrHi.bitfields.addr);
315 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
316 cntl.bitfields.mask);
317 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
318 cntl.bitfields.mode);
319 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
320 cntl.bitfields.vmid);
321 pr_debug("\t\t%20s %08x\n", "Control atc is :",
322 cntl.bitfields.atc);
323 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
324
325 pdd->dev->kfd2kgd->address_watch_execute(
326 dbgdev->dev->kgd,
327 i,
328 cntl.u32All,
329 addrHi.u32All,
330 addrLo.u32All);
331 }
332
333 return 0;
334}
335
336static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
337 struct dbg_address_watch_info *adw_info)
338{
339 struct pm4__set_config_reg *packets_vec;
340 union TCP_WATCH_ADDR_H_BITS addrHi;
341 union TCP_WATCH_ADDR_L_BITS addrLo;
342 union TCP_WATCH_CNTL_BITS cntl;
343 struct kfd_mem_obj *mem_obj;
344 unsigned int aw_reg_add_dword;
345 uint32_t *packet_buff_uint;
346 unsigned int i;
347 int status;
348 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
349
350 unsigned int vmid = 0;
351
352 addrHi.u32All = 0;
353 addrLo.u32All = 0;
354 cntl.u32All = 0;
355
356 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
357 (adw_info->num_watch_points == 0)) {
358 pr_err("num_watch_points is invalid\n");
359 return -EINVAL;
360 }
361
362 if (!adw_info->watch_mode || !adw_info->watch_address) {
363 pr_err("adw_info fields are not valid\n");
364 return -EINVAL;
365 }
366
367 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
368
369 if (status) {
370 pr_err("Failed to allocate GART memory\n");
371 return status;
372 }
373
374 packet_buff_uint = mem_obj->cpu_ptr;
375
376 memset(packet_buff_uint, 0, ib_size);
377
378 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
379
380 packets_vec[0].header.count = 1;
381 packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
382 packets_vec[0].header.type = PM4_TYPE_3;
383 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
384 packets_vec[0].bitfields2.insert_vmid = 1;
385 packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
386 packets_vec[1].bitfields2.insert_vmid = 0;
387 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
388 packets_vec[2].bitfields2.insert_vmid = 0;
389 packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
390 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
391 packets_vec[3].bitfields2.insert_vmid = 1;
392
393 for (i = 0; i < adw_info->num_watch_points; i++) {
394 dbgdev_address_watch_set_registers(adw_info,
395 &addrHi,
396 &addrLo,
397 &cntl,
398 i,
399 vmid);
400
401 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
402 pr_debug("\t\t%20s %08x\n", "register index :", i);
403 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
404 pr_debug("\t\t%20s %p\n", "Add ptr is :",
405 adw_info->watch_address);
406 pr_debug("\t\t%20s %08llx\n", "Add is :",
407 adw_info->watch_address[i]);
408 pr_debug("\t\t%20s %08x\n", "Address Low is :",
409 addrLo.bitfields.addr);
410 pr_debug("\t\t%20s %08x\n", "Address high is :",
411 addrHi.bitfields.addr);
412 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
413 cntl.bitfields.mask);
414 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
415 cntl.bitfields.mode);
416 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
417 cntl.bitfields.vmid);
418 pr_debug("\t\t%20s %08x\n", "Control atc is :",
419 cntl.bitfields.atc);
420 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
421
422 aw_reg_add_dword =
423 dbgdev->dev->kfd2kgd->address_watch_get_offset(
424 dbgdev->dev->kgd,
425 i,
426 ADDRESS_WATCH_REG_CNTL);
427
428 packets_vec[0].bitfields2.reg_offset =
429 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
430
431 packets_vec[0].reg_data[0] = cntl.u32All;
432
433 aw_reg_add_dword =
434 dbgdev->dev->kfd2kgd->address_watch_get_offset(
435 dbgdev->dev->kgd,
436 i,
437 ADDRESS_WATCH_REG_ADDR_HI);
438
439 packets_vec[1].bitfields2.reg_offset =
440 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
441 packets_vec[1].reg_data[0] = addrHi.u32All;
442
443 aw_reg_add_dword =
444 dbgdev->dev->kfd2kgd->address_watch_get_offset(
445 dbgdev->dev->kgd,
446 i,
447 ADDRESS_WATCH_REG_ADDR_LO);
448
449 packets_vec[2].bitfields2.reg_offset =
450 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
451 packets_vec[2].reg_data[0] = addrLo.u32All;
452
453
454 if (adw_info->watch_address[i] > 0)
455 cntl.bitfields.valid = 1;
456 else
457 cntl.bitfields.valid = 0;
458
459 aw_reg_add_dword =
460 dbgdev->dev->kfd2kgd->address_watch_get_offset(
461 dbgdev->dev->kgd,
462 i,
463 ADDRESS_WATCH_REG_CNTL);
464
465 packets_vec[3].bitfields2.reg_offset =
466 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
467 packets_vec[3].reg_data[0] = cntl.u32All;
468
469 status = dbgdev_diq_submit_ib(
470 dbgdev,
471 adw_info->process->pasid,
472 mem_obj->gpu_addr,
473 packet_buff_uint,
474 ib_size);
475
476 if (status) {
477 pr_err("Failed to submit IB to DIQ\n");
478 break;
479 }
480 }
481
482 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
483 return status;
484}
485
486static int dbgdev_wave_control_set_registers(
487 struct dbg_wave_control_info *wac_info,
488 union SQ_CMD_BITS *in_reg_sq_cmd,
489 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
490{
491 int status = 0;
492 union SQ_CMD_BITS reg_sq_cmd;
493 union GRBM_GFX_INDEX_BITS reg_gfx_index;
494 struct HsaDbgWaveMsgAMDGen2 *pMsg;
495
496 reg_sq_cmd.u32All = 0;
497 reg_gfx_index.u32All = 0;
498 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
499
500 switch (wac_info->mode) {
501
502 case HSA_DBG_WAVEMODE_SINGLE:
503
504
505
506
507 reg_sq_cmd.bits.check_vmid = 1;
508 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
509 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
510 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
511
512 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
513 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
514 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
515
516 break;
517
518
519 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
520
521 reg_gfx_index.bits.sh_broadcast_writes = 1;
522 reg_gfx_index.bits.se_broadcast_writes = 1;
523 reg_gfx_index.bits.instance_broadcast_writes = 1;
524
525 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
526
527 break;
528
529
530 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
531
532 reg_sq_cmd.bits.check_vmid = 1;
533 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
534
535 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
536 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
537 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
538
539 break;
540
541 default:
542 return -EINVAL;
543 }
544
545 switch (wac_info->operand) {
546 case HSA_DBG_WAVEOP_HALT:
547 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
548 break;
549
550 case HSA_DBG_WAVEOP_RESUME:
551 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
552 break;
553
554 case HSA_DBG_WAVEOP_KILL:
555 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
556 break;
557
558 case HSA_DBG_WAVEOP_DEBUG:
559 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
560 break;
561
562 case HSA_DBG_WAVEOP_TRAP:
563 if (wac_info->trapId < MAX_TRAPID) {
564 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
565 reg_sq_cmd.bits.trap_id = wac_info->trapId;
566 } else {
567 status = -EINVAL;
568 }
569 break;
570
571 default:
572 status = -EINVAL;
573 break;
574 }
575
576 if (status == 0) {
577 *in_reg_sq_cmd = reg_sq_cmd;
578 *in_reg_gfx_index = reg_gfx_index;
579 }
580
581 return status;
582}
583
584static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
585 struct dbg_wave_control_info *wac_info)
586{
587
588 int status;
589 union SQ_CMD_BITS reg_sq_cmd;
590 union GRBM_GFX_INDEX_BITS reg_gfx_index;
591 struct kfd_mem_obj *mem_obj;
592 uint32_t *packet_buff_uint;
593 struct pm4__set_config_reg *packets_vec;
594 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
595
596 reg_sq_cmd.u32All = 0;
597
598 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
599 ®_gfx_index);
600 if (status) {
601 pr_err("Failed to set wave control registers\n");
602 return status;
603 }
604
605
606 reg_sq_cmd.bits.vm_id = 0;
607
608 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
609
610 pr_debug("\t\t mode is: %u\n", wac_info->mode);
611 pr_debug("\t\t operand is: %u\n", wac_info->operand);
612 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
613 pr_debug("\t\t msg value is: %u\n",
614 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
615 pr_debug("\t\t vmid is: N/A\n");
616
617 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
618 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
619 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
620 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
621 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
622 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
623 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
624
625 pr_debug("\t\t ibw is : %u\n",
626 reg_gfx_index.bitfields.instance_broadcast_writes);
627 pr_debug("\t\t ii is : %u\n",
628 reg_gfx_index.bitfields.instance_index);
629 pr_debug("\t\t sebw is : %u\n",
630 reg_gfx_index.bitfields.se_broadcast_writes);
631 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
632 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
633 pr_debug("\t\t sbw is : %u\n",
634 reg_gfx_index.bitfields.sh_broadcast_writes);
635
636 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
637
638 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
639
640 if (status != 0) {
641 pr_err("Failed to allocate GART memory\n");
642 return status;
643 }
644
645 packet_buff_uint = mem_obj->cpu_ptr;
646
647 memset(packet_buff_uint, 0, ib_size);
648
649 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
650 packets_vec[0].header.count = 1;
651 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
652 packets_vec[0].header.type = PM4_TYPE_3;
653 packets_vec[0].bitfields2.reg_offset =
654 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
655 USERCONFIG_REG_BASE;
656
657 packets_vec[0].bitfields2.insert_vmid = 0;
658 packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
659
660 packets_vec[1].header.count = 1;
661 packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
662 packets_vec[1].header.type = PM4_TYPE_3;
663 packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
664 AMD_CONFIG_REG_BASE;
665
666 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
667 packets_vec[1].bitfields2.insert_vmid = 1;
668 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
669
670
671
672 reg_gfx_index.u32All = 0;
673 reg_gfx_index.bits.sh_broadcast_writes = 1;
674 reg_gfx_index.bits.instance_broadcast_writes = 1;
675 reg_gfx_index.bits.se_broadcast_writes = 1;
676
677
678 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
679 packets_vec[2].bitfields2.reg_offset =
680 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
681 USERCONFIG_REG_BASE;
682
683 packets_vec[2].bitfields2.insert_vmid = 0;
684 packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
685
686 status = dbgdev_diq_submit_ib(
687 dbgdev,
688 wac_info->process->pasid,
689 mem_obj->gpu_addr,
690 packet_buff_uint,
691 ib_size);
692
693 if (status)
694 pr_err("Failed to submit IB to DIQ\n");
695
696 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
697
698 return status;
699}
700
701static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
702 struct dbg_wave_control_info *wac_info)
703{
704 int status;
705 union SQ_CMD_BITS reg_sq_cmd;
706 union GRBM_GFX_INDEX_BITS reg_gfx_index;
707 struct kfd_process_device *pdd;
708
709 reg_sq_cmd.u32All = 0;
710
711
712 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
713
714 if (!pdd) {
715 pr_err("Failed to get pdd for wave control no DIQ\n");
716 return -EFAULT;
717 }
718 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
719 ®_gfx_index);
720 if (status) {
721 pr_err("Failed to set wave control registers\n");
722 return status;
723 }
724
725
726
727 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
728
729 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
730
731 pr_debug("\t\t mode is: %u\n", wac_info->mode);
732 pr_debug("\t\t operand is: %u\n", wac_info->operand);
733 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
734 pr_debug("\t\t msg value is: %u\n",
735 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
736 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
737
738 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
739 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
740 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
741 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
742 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
743 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
744 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
745
746 pr_debug("\t\t ibw is : %u\n",
747 reg_gfx_index.bitfields.instance_broadcast_writes);
748 pr_debug("\t\t ii is : %u\n",
749 reg_gfx_index.bitfields.instance_index);
750 pr_debug("\t\t sebw is : %u\n",
751 reg_gfx_index.bitfields.se_broadcast_writes);
752 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
753 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
754 pr_debug("\t\t sbw is : %u\n",
755 reg_gfx_index.bitfields.sh_broadcast_writes);
756
757 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
758
759 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
760 reg_gfx_index.u32All,
761 reg_sq_cmd.u32All);
762}
763
764int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
765{
766 int status = 0;
767 unsigned int vmid;
768 union SQ_CMD_BITS reg_sq_cmd;
769 union GRBM_GFX_INDEX_BITS reg_gfx_index;
770 struct kfd_process_device *pdd;
771 struct dbg_wave_control_info wac_info;
772 int temp;
773 int first_vmid_to_scan = 8;
774 int last_vmid_to_scan = 15;
775
776 first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
777 temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
778 last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
779
780 reg_sq_cmd.u32All = 0;
781 status = 0;
782
783 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
784 wac_info.operand = HSA_DBG_WAVEOP_KILL;
785
786 pr_debug("Killing all process wavefronts\n");
787
788
789
790
791
792
793 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
794 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
795 (dev->kgd, vmid)) {
796 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
797 (dev->kgd, vmid) == p->pasid) {
798 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
799 vmid, p->pasid);
800 break;
801 }
802 }
803 }
804
805 if (vmid > last_vmid_to_scan) {
806 pr_err("Didn't find vmid for pasid %d\n", p->pasid);
807 return -EFAULT;
808 }
809
810
811 pdd = kfd_get_process_device_data(dev, p);
812 if (!pdd)
813 return -EFAULT;
814
815 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd,
816 ®_gfx_index);
817 if (status != 0)
818 return -EINVAL;
819
820
821 reg_sq_cmd.bits.vm_id = vmid;
822
823 dev->kfd2kgd->wave_control_execute(dev->kgd,
824 reg_gfx_index.u32All,
825 reg_sq_cmd.u32All);
826
827 return 0;
828}
829
830void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
831 enum DBGDEV_TYPE type)
832{
833 pdbgdev->dev = pdev;
834 pdbgdev->kq = NULL;
835 pdbgdev->type = type;
836 pdbgdev->pqm = NULL;
837
838 switch (type) {
839 case DBGDEV_TYPE_NODIQ:
840 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
841 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
842 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
843 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
844 break;
845 case DBGDEV_TYPE_DIQ:
846 default:
847 pdbgdev->dbgdev_register = dbgdev_register_diq;
848 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
849 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
850 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
851 break;
852 }
853
854}
855