linux/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/log2.h>
  27#include <linux/sched.h>
  28#include <linux/slab.h>
  29#include <linux/mutex.h>
  30#include <linux/device.h>
  31
  32#include "kfd_pm4_headers.h"
  33#include "kfd_pm4_headers_diq.h"
  34#include "kfd_kernel_queue.h"
  35#include "kfd_priv.h"
  36#include "kfd_pm4_opcodes.h"
  37#include "cik_regs.h"
  38#include "kfd_dbgmgr.h"
  39#include "kfd_dbgdev.h"
  40#include "kfd_device_queue_manager.h"
  41#include "../../radeon/cik_reg.h"
  42
  43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  44{
  45        dev->kfd2kgd->address_watch_disable(dev->kgd);
  46}
  47
  48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  49                                unsigned int pasid, uint64_t vmid0_address,
  50                                uint32_t *packet_buff, size_t size_in_bytes)
  51{
  52        struct pm4__release_mem *rm_packet;
  53        struct pm4__indirect_buffer_pasid *ib_packet;
  54        struct kfd_mem_obj *mem_obj;
  55        size_t pq_packets_size_in_bytes;
  56        union ULARGE_INTEGER *largep;
  57        union ULARGE_INTEGER addr;
  58        struct kernel_queue *kq;
  59        uint64_t *rm_state;
  60        unsigned int *ib_packet_buff;
  61        int status;
  62
  63        if (WARN_ON(!size_in_bytes))
  64                return -EINVAL;
  65
  66        kq = dbgdev->kq;
  67
  68        pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  69                                sizeof(struct pm4__indirect_buffer_pasid);
  70
  71        /*
  72         * We acquire a buffer from DIQ
  73         * The receive packet buff will be sitting on the Indirect Buffer
  74         * and in the PQ we put the IB packet + sync packet(s).
  75         */
  76        status = kq->ops.acquire_packet_buffer(kq,
  77                                pq_packets_size_in_bytes / sizeof(uint32_t),
  78                                &ib_packet_buff);
  79        if (status) {
  80                pr_err("acquire_packet_buffer failed\n");
  81                return status;
  82        }
  83
  84        memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  85
  86        ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  87
  88        ib_packet->header.count = 3;
  89        ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  90        ib_packet->header.type = PM4_TYPE_3;
  91
  92        largep = (union ULARGE_INTEGER *) &vmid0_address;
  93
  94        ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  95        ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  96
  97        ib_packet->control = (1 << 23) | (1 << 31) |
  98                        ((size_in_bytes / 4) & 0xfffff);
  99
 100        ib_packet->bitfields5.pasid = pasid;
 101
 102        /*
 103         * for now we use release mem for GPU-CPU synchronization
 104         * Consider WaitRegMem + WriteData as a better alternative
 105         * we get a GART allocations ( gpu/cpu mapping),
 106         * for the sync variable, and wait until:
 107         * (a) Sync with HW
 108         * (b) Sync var is written by CP to mem.
 109         */
 110        rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
 111                        (sizeof(struct pm4__indirect_buffer_pasid) /
 112                                        sizeof(unsigned int)));
 113
 114        status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
 115                                        &mem_obj);
 116
 117        if (status) {
 118                pr_err("Failed to allocate GART memory\n");
 119                kq->ops.rollback_packet(kq);
 120                return status;
 121        }
 122
 123        rm_state = (uint64_t *) mem_obj->cpu_ptr;
 124
 125        *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
 126
 127        rm_packet->header.opcode = IT_RELEASE_MEM;
 128        rm_packet->header.type = PM4_TYPE_3;
 129        rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
 130
 131        rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
 132        rm_packet->bitfields2.event_index =
 133                                event_index___release_mem__end_of_pipe;
 134
 135        rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
 136        rm_packet->bitfields2.atc = 0;
 137        rm_packet->bitfields2.tc_wb_action_ena = 1;
 138
 139        addr.quad_part = mem_obj->gpu_addr;
 140
 141        rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
 142        rm_packet->address_hi = addr.u.high_part;
 143
 144        rm_packet->bitfields3.data_sel =
 145                                data_sel___release_mem__send_64_bit_data;
 146
 147        rm_packet->bitfields3.int_sel =
 148                        int_sel___release_mem__send_data_after_write_confirm;
 149
 150        rm_packet->bitfields3.dst_sel =
 151                        dst_sel___release_mem__memory_controller;
 152
 153        rm_packet->data_lo = QUEUESTATE__ACTIVE;
 154
 155        kq->ops.submit_packet(kq);
 156
 157        /* Wait till CP writes sync code: */
 158        status = amdkfd_fence_wait_timeout(
 159                        (unsigned int *) rm_state,
 160                        QUEUESTATE__ACTIVE, 1500);
 161
 162        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 163
 164        return status;
 165}
 166
 167static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
 168{
 169        /*
 170         * no action is needed in this case,
 171         * just make sure diq will not be used
 172         */
 173
 174        dbgdev->kq = NULL;
 175
 176        return 0;
 177}
 178
 179static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 180{
 181        struct queue_properties properties;
 182        unsigned int qid;
 183        struct kernel_queue *kq = NULL;
 184        int status;
 185
 186        properties.type = KFD_QUEUE_TYPE_DIQ;
 187
 188        status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
 189                                &properties, &qid);
 190
 191        if (status) {
 192                pr_err("Failed to create DIQ\n");
 193                return status;
 194        }
 195
 196        pr_debug("DIQ Created with queue id: %d\n", qid);
 197
 198        kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
 199
 200        if (!kq) {
 201                pr_err("Error getting DIQ\n");
 202                pqm_destroy_queue(dbgdev->pqm, qid);
 203                return -EFAULT;
 204        }
 205
 206        dbgdev->kq = kq;
 207
 208        return status;
 209}
 210
 211static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
 212{
 213        /* disable watch address */
 214        dbgdev_address_watch_disable_nodiq(dbgdev->dev);
 215        return 0;
 216}
 217
 218static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
 219{
 220        /* todo - disable address watch */
 221        int status;
 222
 223        status = pqm_destroy_queue(dbgdev->pqm,
 224                        dbgdev->kq->queue->properties.queue_id);
 225        dbgdev->kq = NULL;
 226
 227        return status;
 228}
 229
 230static void dbgdev_address_watch_set_registers(
 231                        const struct dbg_address_watch_info *adw_info,
 232                        union TCP_WATCH_ADDR_H_BITS *addrHi,
 233                        union TCP_WATCH_ADDR_L_BITS *addrLo,
 234                        union TCP_WATCH_CNTL_BITS *cntl,
 235                        unsigned int index, unsigned int vmid)
 236{
 237        union ULARGE_INTEGER addr;
 238
 239        addr.quad_part = 0;
 240        addrHi->u32All = 0;
 241        addrLo->u32All = 0;
 242        cntl->u32All = 0;
 243
 244        if (adw_info->watch_mask)
 245                cntl->bitfields.mask =
 246                        (uint32_t) (adw_info->watch_mask[index] &
 247                                        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
 248        else
 249                cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 250
 251        addr.quad_part = (unsigned long long) adw_info->watch_address[index];
 252
 253        addrHi->bitfields.addr = addr.u.high_part &
 254                                        ADDRESS_WATCH_REG_ADDHIGH_MASK;
 255        addrLo->bitfields.addr =
 256                        (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
 257
 258        cntl->bitfields.mode = adw_info->watch_mode[index];
 259        cntl->bitfields.vmid = (uint32_t) vmid;
 260        /* for now assume it is an ATC address */
 261        cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
 262
 263        pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
 264        pr_debug("\t\t%20s %08x\n", "set reg add high :",
 265                        addrHi->bitfields.addr);
 266        pr_debug("\t\t%20s %08x\n", "set reg add low :",
 267                        addrLo->bitfields.addr);
 268}
 269
 270static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
 271                                      struct dbg_address_watch_info *adw_info)
 272{
 273        union TCP_WATCH_ADDR_H_BITS addrHi;
 274        union TCP_WATCH_ADDR_L_BITS addrLo;
 275        union TCP_WATCH_CNTL_BITS cntl;
 276        struct kfd_process_device *pdd;
 277        unsigned int i;
 278
 279        /* taking the vmid for that process on the safe way using pdd */
 280        pdd = kfd_get_process_device_data(dbgdev->dev,
 281                                        adw_info->process);
 282        if (!pdd) {
 283                pr_err("Failed to get pdd for wave control no DIQ\n");
 284                return -EFAULT;
 285        }
 286
 287        addrHi.u32All = 0;
 288        addrLo.u32All = 0;
 289        cntl.u32All = 0;
 290
 291        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 292                        (adw_info->num_watch_points == 0)) {
 293                pr_err("num_watch_points is invalid\n");
 294                return -EINVAL;
 295        }
 296
 297        if (!adw_info->watch_mode || !adw_info->watch_address) {
 298                pr_err("adw_info fields are not valid\n");
 299                return -EINVAL;
 300        }
 301
 302        for (i = 0; i < adw_info->num_watch_points; i++) {
 303                dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
 304                                                &cntl, i, pdd->qpd.vmid);
 305
 306                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 307                pr_debug("\t\t%20s %08x\n", "register index :", i);
 308                pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
 309                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 310                                addrLo.bitfields.addr);
 311                pr_debug("\t\t%20s %08x\n", "Address high is :",
 312                                addrHi.bitfields.addr);
 313                pr_debug("\t\t%20s %08x\n", "Address high is :",
 314                                addrHi.bitfields.addr);
 315                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 316                                cntl.bitfields.mask);
 317                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 318                                cntl.bitfields.mode);
 319                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 320                                cntl.bitfields.vmid);
 321                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 322                                cntl.bitfields.atc);
 323                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 324
 325                pdd->dev->kfd2kgd->address_watch_execute(
 326                                                dbgdev->dev->kgd,
 327                                                i,
 328                                                cntl.u32All,
 329                                                addrHi.u32All,
 330                                                addrLo.u32All);
 331        }
 332
 333        return 0;
 334}
 335
 336static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 337                                    struct dbg_address_watch_info *adw_info)
 338{
 339        struct pm4__set_config_reg *packets_vec;
 340        union TCP_WATCH_ADDR_H_BITS addrHi;
 341        union TCP_WATCH_ADDR_L_BITS addrLo;
 342        union TCP_WATCH_CNTL_BITS cntl;
 343        struct kfd_mem_obj *mem_obj;
 344        unsigned int aw_reg_add_dword;
 345        uint32_t *packet_buff_uint;
 346        unsigned int i;
 347        int status;
 348        size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
 349        /* we do not control the vmid in DIQ mode, just a place holder */
 350        unsigned int vmid = 0;
 351
 352        addrHi.u32All = 0;
 353        addrLo.u32All = 0;
 354        cntl.u32All = 0;
 355
 356        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 357                        (adw_info->num_watch_points == 0)) {
 358                pr_err("num_watch_points is invalid\n");
 359                return -EINVAL;
 360        }
 361
 362        if (!adw_info->watch_mode || !adw_info->watch_address) {
 363                pr_err("adw_info fields are not valid\n");
 364                return -EINVAL;
 365        }
 366
 367        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 368
 369        if (status) {
 370                pr_err("Failed to allocate GART memory\n");
 371                return status;
 372        }
 373
 374        packet_buff_uint = mem_obj->cpu_ptr;
 375
 376        memset(packet_buff_uint, 0, ib_size);
 377
 378        packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
 379
 380        packets_vec[0].header.count = 1;
 381        packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
 382        packets_vec[0].header.type = PM4_TYPE_3;
 383        packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 384        packets_vec[0].bitfields2.insert_vmid = 1;
 385        packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
 386        packets_vec[1].bitfields2.insert_vmid = 0;
 387        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 388        packets_vec[2].bitfields2.insert_vmid = 0;
 389        packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
 390        packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 391        packets_vec[3].bitfields2.insert_vmid = 1;
 392
 393        for (i = 0; i < adw_info->num_watch_points; i++) {
 394                dbgdev_address_watch_set_registers(adw_info,
 395                                                &addrHi,
 396                                                &addrLo,
 397                                                &cntl,
 398                                                i,
 399                                                vmid);
 400
 401                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 402                pr_debug("\t\t%20s %08x\n", "register index :", i);
 403                pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
 404                pr_debug("\t\t%20s %p\n", "Add ptr is :",
 405                                adw_info->watch_address);
 406                pr_debug("\t\t%20s %08llx\n", "Add     is :",
 407                                adw_info->watch_address[i]);
 408                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 409                                addrLo.bitfields.addr);
 410                pr_debug("\t\t%20s %08x\n", "Address high is :",
 411                                addrHi.bitfields.addr);
 412                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 413                                cntl.bitfields.mask);
 414                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 415                                cntl.bitfields.mode);
 416                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 417                                cntl.bitfields.vmid);
 418                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 419                                cntl.bitfields.atc);
 420                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 421
 422                aw_reg_add_dword =
 423                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 424                                        dbgdev->dev->kgd,
 425                                        i,
 426                                        ADDRESS_WATCH_REG_CNTL);
 427
 428                packets_vec[0].bitfields2.reg_offset =
 429                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 430
 431                packets_vec[0].reg_data[0] = cntl.u32All;
 432
 433                aw_reg_add_dword =
 434                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 435                                        dbgdev->dev->kgd,
 436                                        i,
 437                                        ADDRESS_WATCH_REG_ADDR_HI);
 438
 439                packets_vec[1].bitfields2.reg_offset =
 440                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 441                packets_vec[1].reg_data[0] = addrHi.u32All;
 442
 443                aw_reg_add_dword =
 444                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 445                                        dbgdev->dev->kgd,
 446                                        i,
 447                                        ADDRESS_WATCH_REG_ADDR_LO);
 448
 449                packets_vec[2].bitfields2.reg_offset =
 450                                aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 451                packets_vec[2].reg_data[0] = addrLo.u32All;
 452
 453                /* enable watch flag if address is not zero*/
 454                if (adw_info->watch_address[i] > 0)
 455                        cntl.bitfields.valid = 1;
 456                else
 457                        cntl.bitfields.valid = 0;
 458
 459                aw_reg_add_dword =
 460                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 461                                        dbgdev->dev->kgd,
 462                                        i,
 463                                        ADDRESS_WATCH_REG_CNTL);
 464
 465                packets_vec[3].bitfields2.reg_offset =
 466                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 467                packets_vec[3].reg_data[0] = cntl.u32All;
 468
 469                status = dbgdev_diq_submit_ib(
 470                                        dbgdev,
 471                                        adw_info->process->pasid,
 472                                        mem_obj->gpu_addr,
 473                                        packet_buff_uint,
 474                                        ib_size);
 475
 476                if (status) {
 477                        pr_err("Failed to submit IB to DIQ\n");
 478                        break;
 479                }
 480        }
 481
 482        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 483        return status;
 484}
 485
 486static int dbgdev_wave_control_set_registers(
 487                                struct dbg_wave_control_info *wac_info,
 488                                union SQ_CMD_BITS *in_reg_sq_cmd,
 489                                union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 490{
 491        int status = 0;
 492        union SQ_CMD_BITS reg_sq_cmd;
 493        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 494        struct HsaDbgWaveMsgAMDGen2 *pMsg;
 495
 496        reg_sq_cmd.u32All = 0;
 497        reg_gfx_index.u32All = 0;
 498        pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
 499
 500        switch (wac_info->mode) {
 501        /* Send command to single wave */
 502        case HSA_DBG_WAVEMODE_SINGLE:
 503                /*
 504                 * Limit access to the process waves only,
 505                 * by setting vmid check
 506                 */
 507                reg_sq_cmd.bits.check_vmid = 1;
 508                reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
 509                reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
 510                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
 511
 512                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 513                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 514                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 515
 516                break;
 517
 518        /* Send command to all waves with matching VMID */
 519        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
 520
 521                reg_gfx_index.bits.sh_broadcast_writes = 1;
 522                reg_gfx_index.bits.se_broadcast_writes = 1;
 523                reg_gfx_index.bits.instance_broadcast_writes = 1;
 524
 525                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 526
 527                break;
 528
 529        /* Send command to all CU waves with matching VMID */
 530        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
 531
 532                reg_sq_cmd.bits.check_vmid = 1;
 533                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 534
 535                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 536                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 537                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 538
 539                break;
 540
 541        default:
 542                return -EINVAL;
 543        }
 544
 545        switch (wac_info->operand) {
 546        case HSA_DBG_WAVEOP_HALT:
 547                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
 548                break;
 549
 550        case HSA_DBG_WAVEOP_RESUME:
 551                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
 552                break;
 553
 554        case HSA_DBG_WAVEOP_KILL:
 555                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
 556                break;
 557
 558        case HSA_DBG_WAVEOP_DEBUG:
 559                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
 560                break;
 561
 562        case HSA_DBG_WAVEOP_TRAP:
 563                if (wac_info->trapId < MAX_TRAPID) {
 564                        reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
 565                        reg_sq_cmd.bits.trap_id = wac_info->trapId;
 566                } else {
 567                        status = -EINVAL;
 568                }
 569                break;
 570
 571        default:
 572                status = -EINVAL;
 573                break;
 574        }
 575
 576        if (status == 0) {
 577                *in_reg_sq_cmd = reg_sq_cmd;
 578                *in_reg_gfx_index = reg_gfx_index;
 579        }
 580
 581        return status;
 582}
 583
 584static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 585                                        struct dbg_wave_control_info *wac_info)
 586{
 587
 588        int status;
 589        union SQ_CMD_BITS reg_sq_cmd;
 590        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 591        struct kfd_mem_obj *mem_obj;
 592        uint32_t *packet_buff_uint;
 593        struct pm4__set_config_reg *packets_vec;
 594        size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
 595
 596        reg_sq_cmd.u32All = 0;
 597
 598        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 599                                                        &reg_gfx_index);
 600        if (status) {
 601                pr_err("Failed to set wave control registers\n");
 602                return status;
 603        }
 604
 605        /* we do not control the VMID in DIQ, so reset it to a known value */
 606        reg_sq_cmd.bits.vm_id = 0;
 607
 608        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 609
 610        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 611        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 612        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 613        pr_debug("\t\t msg value is: %u\n",
 614                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 615        pr_debug("\t\t vmid      is: N/A\n");
 616
 617        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 618        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 619        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 620        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 621        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 622        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 623        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 624
 625        pr_debug("\t\t ibw       is : %u\n",
 626                        reg_gfx_index.bitfields.instance_broadcast_writes);
 627        pr_debug("\t\t ii        is : %u\n",
 628                        reg_gfx_index.bitfields.instance_index);
 629        pr_debug("\t\t sebw      is : %u\n",
 630                        reg_gfx_index.bitfields.se_broadcast_writes);
 631        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 632        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 633        pr_debug("\t\t sbw       is : %u\n",
 634                        reg_gfx_index.bitfields.sh_broadcast_writes);
 635
 636        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 637
 638        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 639
 640        if (status != 0) {
 641                pr_err("Failed to allocate GART memory\n");
 642                return status;
 643        }
 644
 645        packet_buff_uint = mem_obj->cpu_ptr;
 646
 647        memset(packet_buff_uint, 0, ib_size);
 648
 649        packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
 650        packets_vec[0].header.count = 1;
 651        packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
 652        packets_vec[0].header.type = PM4_TYPE_3;
 653        packets_vec[0].bitfields2.reg_offset =
 654                        GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 655
 656        packets_vec[0].bitfields2.insert_vmid = 0;
 657        packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
 658
 659        packets_vec[1].header.count = 1;
 660        packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 661        packets_vec[1].header.type = PM4_TYPE_3;
 662        packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
 663
 664        packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 665        packets_vec[1].bitfields2.insert_vmid = 1;
 666        packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
 667
 668        /* Restore the GRBM_GFX_INDEX register */
 669
 670        reg_gfx_index.u32All = 0;
 671        reg_gfx_index.bits.sh_broadcast_writes = 1;
 672        reg_gfx_index.bits.instance_broadcast_writes = 1;
 673        reg_gfx_index.bits.se_broadcast_writes = 1;
 674
 675
 676        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 677        packets_vec[2].bitfields2.reg_offset =
 678                                GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 679
 680        packets_vec[2].bitfields2.insert_vmid = 0;
 681        packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
 682
 683        status = dbgdev_diq_submit_ib(
 684                        dbgdev,
 685                        wac_info->process->pasid,
 686                        mem_obj->gpu_addr,
 687                        packet_buff_uint,
 688                        ib_size);
 689
 690        if (status)
 691                pr_err("Failed to submit IB to DIQ\n");
 692
 693        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 694
 695        return status;
 696}
 697
 698static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
 699                                        struct dbg_wave_control_info *wac_info)
 700{
 701        int status;
 702        union SQ_CMD_BITS reg_sq_cmd;
 703        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 704        struct kfd_process_device *pdd;
 705
 706        reg_sq_cmd.u32All = 0;
 707
 708        /* taking the VMID for that process on the safe way using PDD */
 709        pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
 710
 711        if (!pdd) {
 712                pr_err("Failed to get pdd for wave control no DIQ\n");
 713                return -EFAULT;
 714        }
 715        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 716                                                        &reg_gfx_index);
 717        if (status) {
 718                pr_err("Failed to set wave control registers\n");
 719                return status;
 720        }
 721
 722        /* for non DIQ we need to patch the VMID: */
 723
 724        reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
 725
 726        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 727
 728        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 729        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 730        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 731        pr_debug("\t\t msg value is: %u\n",
 732                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 733        pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
 734
 735        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 736        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 737        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 738        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 739        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 740        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 741        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 742
 743        pr_debug("\t\t ibw       is : %u\n",
 744                        reg_gfx_index.bitfields.instance_broadcast_writes);
 745        pr_debug("\t\t ii        is : %u\n",
 746                        reg_gfx_index.bitfields.instance_index);
 747        pr_debug("\t\t sebw      is : %u\n",
 748                        reg_gfx_index.bitfields.se_broadcast_writes);
 749        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 750        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 751        pr_debug("\t\t sbw       is : %u\n",
 752                        reg_gfx_index.bitfields.sh_broadcast_writes);
 753
 754        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 755
 756        return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
 757                                                        reg_gfx_index.u32All,
 758                                                        reg_sq_cmd.u32All);
 759}
 760
 761int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 762{
 763        int status = 0;
 764        unsigned int vmid;
 765        union SQ_CMD_BITS reg_sq_cmd;
 766        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 767        struct kfd_process_device *pdd;
 768        struct dbg_wave_control_info wac_info;
 769        int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
 770        int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
 771
 772        reg_sq_cmd.u32All = 0;
 773        status = 0;
 774
 775        wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
 776        wac_info.operand = HSA_DBG_WAVEOP_KILL;
 777
 778        pr_debug("Killing all process wavefronts\n");
 779
 780        /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
 781         * ATC_VMID15_PASID_MAPPING
 782         * to check which VMID the current process is mapped to.
 783         */
 784
 785        for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
 786                if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
 787                                (dev->kgd, vmid)) {
 788                        if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
 789                                        (dev->kgd, vmid) == p->pasid) {
 790                                pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
 791                                                vmid, p->pasid);
 792                                break;
 793                        }
 794                }
 795        }
 796
 797        if (vmid > last_vmid_to_scan) {
 798                pr_err("Didn't find vmid for pasid %d\n", p->pasid);
 799                return -EFAULT;
 800        }
 801
 802        /* taking the VMID for that process on the safe way using PDD */
 803        pdd = kfd_get_process_device_data(dev, p);
 804        if (!pdd)
 805                return -EFAULT;
 806
 807        status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
 808                        &reg_gfx_index);
 809        if (status != 0)
 810                return -EINVAL;
 811
 812        /* for non DIQ we need to patch the VMID: */
 813        reg_sq_cmd.bits.vm_id = vmid;
 814
 815        dev->kfd2kgd->wave_control_execute(dev->kgd,
 816                                        reg_gfx_index.u32All,
 817                                        reg_sq_cmd.u32All);
 818
 819        return 0;
 820}
 821
 822void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
 823                        enum DBGDEV_TYPE type)
 824{
 825        pdbgdev->dev = pdev;
 826        pdbgdev->kq = NULL;
 827        pdbgdev->type = type;
 828        pdbgdev->pqm = NULL;
 829
 830        switch (type) {
 831        case DBGDEV_TYPE_NODIQ:
 832                pdbgdev->dbgdev_register = dbgdev_register_nodiq;
 833                pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
 834                pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
 835                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
 836                break;
 837        case DBGDEV_TYPE_DIQ:
 838        default:
 839                pdbgdev->dbgdev_register = dbgdev_register_diq;
 840                pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
 841                pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
 842                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
 843                break;
 844        }
 845
 846}
 847