linux/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/log2.h>
  27#include <linux/sched.h>
  28#include <linux/slab.h>
  29#include <linux/mutex.h>
  30#include <linux/device.h>
  31
  32#include "kfd_pm4_headers.h"
  33#include "kfd_pm4_headers_diq.h"
  34#include "kfd_kernel_queue.h"
  35#include "kfd_priv.h"
  36#include "kfd_pm4_opcodes.h"
  37#include "cik_regs.h"
  38#include "kfd_dbgmgr.h"
  39#include "kfd_dbgdev.h"
  40#include "kfd_device_queue_manager.h"
  41
  42static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  43{
  44        dev->kfd2kgd->address_watch_disable(dev->kgd);
  45}
  46
  47static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  48                                u32 pasid, uint64_t vmid0_address,
  49                                uint32_t *packet_buff, size_t size_in_bytes)
  50{
  51        struct pm4__release_mem *rm_packet;
  52        struct pm4__indirect_buffer_pasid *ib_packet;
  53        struct kfd_mem_obj *mem_obj;
  54        size_t pq_packets_size_in_bytes;
  55        union ULARGE_INTEGER *largep;
  56        union ULARGE_INTEGER addr;
  57        struct kernel_queue *kq;
  58        uint64_t *rm_state;
  59        unsigned int *ib_packet_buff;
  60        int status;
  61
  62        if (WARN_ON(!size_in_bytes))
  63                return -EINVAL;
  64
  65        kq = dbgdev->kq;
  66
  67        pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  68                                sizeof(struct pm4__indirect_buffer_pasid);
  69
  70        /*
  71         * We acquire a buffer from DIQ
  72         * The receive packet buff will be sitting on the Indirect Buffer
  73         * and in the PQ we put the IB packet + sync packet(s).
  74         */
  75        status = kq_acquire_packet_buffer(kq,
  76                                pq_packets_size_in_bytes / sizeof(uint32_t),
  77                                &ib_packet_buff);
  78        if (status) {
  79                pr_err("kq_acquire_packet_buffer failed\n");
  80                return status;
  81        }
  82
  83        memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  84
  85        ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  86
  87        ib_packet->header.count = 3;
  88        ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  89        ib_packet->header.type = PM4_TYPE_3;
  90
  91        largep = (union ULARGE_INTEGER *) &vmid0_address;
  92
  93        ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  94        ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  95
  96        ib_packet->control = (1 << 23) | (1 << 31) |
  97                        ((size_in_bytes / 4) & 0xfffff);
  98
  99        ib_packet->bitfields5.pasid = pasid;
 100
 101        /*
 102         * for now we use release mem for GPU-CPU synchronization
 103         * Consider WaitRegMem + WriteData as a better alternative
 104         * we get a GART allocations ( gpu/cpu mapping),
 105         * for the sync variable, and wait until:
 106         * (a) Sync with HW
 107         * (b) Sync var is written by CP to mem.
 108         */
 109        rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
 110                        (sizeof(struct pm4__indirect_buffer_pasid) /
 111                                        sizeof(unsigned int)));
 112
 113        status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
 114                                        &mem_obj);
 115
 116        if (status) {
 117                pr_err("Failed to allocate GART memory\n");
 118                kq_rollback_packet(kq);
 119                return status;
 120        }
 121
 122        rm_state = (uint64_t *) mem_obj->cpu_ptr;
 123
 124        *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
 125
 126        rm_packet->header.opcode = IT_RELEASE_MEM;
 127        rm_packet->header.type = PM4_TYPE_3;
 128        rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
 129
 130        rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
 131        rm_packet->bitfields2.event_index =
 132                                event_index___release_mem__end_of_pipe;
 133
 134        rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
 135        rm_packet->bitfields2.atc = 0;
 136        rm_packet->bitfields2.tc_wb_action_ena = 1;
 137
 138        addr.quad_part = mem_obj->gpu_addr;
 139
 140        rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
 141        rm_packet->address_hi = addr.u.high_part;
 142
 143        rm_packet->bitfields3.data_sel =
 144                                data_sel___release_mem__send_64_bit_data;
 145
 146        rm_packet->bitfields3.int_sel =
 147                        int_sel___release_mem__send_data_after_write_confirm;
 148
 149        rm_packet->bitfields3.dst_sel =
 150                        dst_sel___release_mem__memory_controller;
 151
 152        rm_packet->data_lo = QUEUESTATE__ACTIVE;
 153
 154        kq_submit_packet(kq);
 155
 156        /* Wait till CP writes sync code: */
 157        status = amdkfd_fence_wait_timeout(
 158                        rm_state,
 159                        QUEUESTATE__ACTIVE, 1500);
 160
 161        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 162
 163        return status;
 164}
 165
 166static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
 167{
 168        /*
 169         * no action is needed in this case,
 170         * just make sure diq will not be used
 171         */
 172
 173        dbgdev->kq = NULL;
 174
 175        return 0;
 176}
 177
 178static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 179{
 180        struct queue_properties properties;
 181        unsigned int qid;
 182        struct kernel_queue *kq = NULL;
 183        int status;
 184
 185        properties.type = KFD_QUEUE_TYPE_DIQ;
 186
 187        status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
 188                                &properties, &qid, NULL);
 189
 190        if (status) {
 191                pr_err("Failed to create DIQ\n");
 192                return status;
 193        }
 194
 195        pr_debug("DIQ Created with queue id: %d\n", qid);
 196
 197        kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
 198
 199        if (!kq) {
 200                pr_err("Error getting DIQ\n");
 201                pqm_destroy_queue(dbgdev->pqm, qid);
 202                return -EFAULT;
 203        }
 204
 205        dbgdev->kq = kq;
 206
 207        return status;
 208}
 209
 210static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
 211{
 212        /* disable watch address */
 213        dbgdev_address_watch_disable_nodiq(dbgdev->dev);
 214        return 0;
 215}
 216
 217static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
 218{
 219        /* todo - disable address watch */
 220        int status;
 221
 222        status = pqm_destroy_queue(dbgdev->pqm,
 223                        dbgdev->kq->queue->properties.queue_id);
 224        dbgdev->kq = NULL;
 225
 226        return status;
 227}
 228
 229static void dbgdev_address_watch_set_registers(
 230                        const struct dbg_address_watch_info *adw_info,
 231                        union TCP_WATCH_ADDR_H_BITS *addrHi,
 232                        union TCP_WATCH_ADDR_L_BITS *addrLo,
 233                        union TCP_WATCH_CNTL_BITS *cntl,
 234                        unsigned int index, unsigned int vmid)
 235{
 236        union ULARGE_INTEGER addr;
 237
 238        addr.quad_part = 0;
 239        addrHi->u32All = 0;
 240        addrLo->u32All = 0;
 241        cntl->u32All = 0;
 242
 243        if (adw_info->watch_mask)
 244                cntl->bitfields.mask =
 245                        (uint32_t) (adw_info->watch_mask[index] &
 246                                        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
 247        else
 248                cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 249
 250        addr.quad_part = (unsigned long long) adw_info->watch_address[index];
 251
 252        addrHi->bitfields.addr = addr.u.high_part &
 253                                        ADDRESS_WATCH_REG_ADDHIGH_MASK;
 254        addrLo->bitfields.addr =
 255                        (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
 256
 257        cntl->bitfields.mode = adw_info->watch_mode[index];
 258        cntl->bitfields.vmid = (uint32_t) vmid;
 259        /* for now assume it is an ATC address */
 260        cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
 261
 262        pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
 263        pr_debug("\t\t%20s %08x\n", "set reg add high :",
 264                        addrHi->bitfields.addr);
 265        pr_debug("\t\t%20s %08x\n", "set reg add low :",
 266                        addrLo->bitfields.addr);
 267}
 268
 269static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
 270                                      struct dbg_address_watch_info *adw_info)
 271{
 272        union TCP_WATCH_ADDR_H_BITS addrHi;
 273        union TCP_WATCH_ADDR_L_BITS addrLo;
 274        union TCP_WATCH_CNTL_BITS cntl;
 275        struct kfd_process_device *pdd;
 276        unsigned int i;
 277
 278        /* taking the vmid for that process on the safe way using pdd */
 279        pdd = kfd_get_process_device_data(dbgdev->dev,
 280                                        adw_info->process);
 281        if (!pdd) {
 282                pr_err("Failed to get pdd for wave control no DIQ\n");
 283                return -EFAULT;
 284        }
 285
 286        addrHi.u32All = 0;
 287        addrLo.u32All = 0;
 288        cntl.u32All = 0;
 289
 290        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 291                        (adw_info->num_watch_points == 0)) {
 292                pr_err("num_watch_points is invalid\n");
 293                return -EINVAL;
 294        }
 295
 296        if (!adw_info->watch_mode || !adw_info->watch_address) {
 297                pr_err("adw_info fields are not valid\n");
 298                return -EINVAL;
 299        }
 300
 301        for (i = 0; i < adw_info->num_watch_points; i++) {
 302                dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
 303                                                &cntl, i, pdd->qpd.vmid);
 304
 305                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 306                pr_debug("\t\t%20s %08x\n", "register index :", i);
 307                pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
 308                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 309                                addrLo.bitfields.addr);
 310                pr_debug("\t\t%20s %08x\n", "Address high is :",
 311                                addrHi.bitfields.addr);
 312                pr_debug("\t\t%20s %08x\n", "Address high is :",
 313                                addrHi.bitfields.addr);
 314                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 315                                cntl.bitfields.mask);
 316                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 317                                cntl.bitfields.mode);
 318                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 319                                cntl.bitfields.vmid);
 320                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 321                                cntl.bitfields.atc);
 322                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 323
 324                pdd->dev->kfd2kgd->address_watch_execute(
 325                                                dbgdev->dev->kgd,
 326                                                i,
 327                                                cntl.u32All,
 328                                                addrHi.u32All,
 329                                                addrLo.u32All);
 330        }
 331
 332        return 0;
 333}
 334
 335static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 336                                    struct dbg_address_watch_info *adw_info)
 337{
 338        struct pm4__set_config_reg *packets_vec;
 339        union TCP_WATCH_ADDR_H_BITS addrHi;
 340        union TCP_WATCH_ADDR_L_BITS addrLo;
 341        union TCP_WATCH_CNTL_BITS cntl;
 342        struct kfd_mem_obj *mem_obj;
 343        unsigned int aw_reg_add_dword;
 344        uint32_t *packet_buff_uint;
 345        unsigned int i;
 346        int status;
 347        size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
 348        /* we do not control the vmid in DIQ mode, just a place holder */
 349        unsigned int vmid = 0;
 350
 351        addrHi.u32All = 0;
 352        addrLo.u32All = 0;
 353        cntl.u32All = 0;
 354
 355        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 356                        (adw_info->num_watch_points == 0)) {
 357                pr_err("num_watch_points is invalid\n");
 358                return -EINVAL;
 359        }
 360
 361        if (!adw_info->watch_mode || !adw_info->watch_address) {
 362                pr_err("adw_info fields are not valid\n");
 363                return -EINVAL;
 364        }
 365
 366        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 367
 368        if (status) {
 369                pr_err("Failed to allocate GART memory\n");
 370                return status;
 371        }
 372
 373        packet_buff_uint = mem_obj->cpu_ptr;
 374
 375        memset(packet_buff_uint, 0, ib_size);
 376
 377        packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
 378
 379        packets_vec[0].header.count = 1;
 380        packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
 381        packets_vec[0].header.type = PM4_TYPE_3;
 382        packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 383        packets_vec[0].bitfields2.insert_vmid = 1;
 384        packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
 385        packets_vec[1].bitfields2.insert_vmid = 0;
 386        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 387        packets_vec[2].bitfields2.insert_vmid = 0;
 388        packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
 389        packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 390        packets_vec[3].bitfields2.insert_vmid = 1;
 391
 392        for (i = 0; i < adw_info->num_watch_points; i++) {
 393                dbgdev_address_watch_set_registers(adw_info,
 394                                                &addrHi,
 395                                                &addrLo,
 396                                                &cntl,
 397                                                i,
 398                                                vmid);
 399
 400                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 401                pr_debug("\t\t%20s %08x\n", "register index :", i);
 402                pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
 403                pr_debug("\t\t%20s %p\n", "Add ptr is :",
 404                                adw_info->watch_address);
 405                pr_debug("\t\t%20s %08llx\n", "Add     is :",
 406                                adw_info->watch_address[i]);
 407                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 408                                addrLo.bitfields.addr);
 409                pr_debug("\t\t%20s %08x\n", "Address high is :",
 410                                addrHi.bitfields.addr);
 411                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 412                                cntl.bitfields.mask);
 413                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 414                                cntl.bitfields.mode);
 415                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 416                                cntl.bitfields.vmid);
 417                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 418                                cntl.bitfields.atc);
 419                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 420
 421                aw_reg_add_dword =
 422                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 423                                        dbgdev->dev->kgd,
 424                                        i,
 425                                        ADDRESS_WATCH_REG_CNTL);
 426
 427                packets_vec[0].bitfields2.reg_offset =
 428                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 429
 430                packets_vec[0].reg_data[0] = cntl.u32All;
 431
 432                aw_reg_add_dword =
 433                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 434                                        dbgdev->dev->kgd,
 435                                        i,
 436                                        ADDRESS_WATCH_REG_ADDR_HI);
 437
 438                packets_vec[1].bitfields2.reg_offset =
 439                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 440                packets_vec[1].reg_data[0] = addrHi.u32All;
 441
 442                aw_reg_add_dword =
 443                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 444                                        dbgdev->dev->kgd,
 445                                        i,
 446                                        ADDRESS_WATCH_REG_ADDR_LO);
 447
 448                packets_vec[2].bitfields2.reg_offset =
 449                                aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 450                packets_vec[2].reg_data[0] = addrLo.u32All;
 451
 452                /* enable watch flag if address is not zero*/
 453                if (adw_info->watch_address[i] > 0)
 454                        cntl.bitfields.valid = 1;
 455                else
 456                        cntl.bitfields.valid = 0;
 457
 458                aw_reg_add_dword =
 459                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 460                                        dbgdev->dev->kgd,
 461                                        i,
 462                                        ADDRESS_WATCH_REG_CNTL);
 463
 464                packets_vec[3].bitfields2.reg_offset =
 465                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 466                packets_vec[3].reg_data[0] = cntl.u32All;
 467
 468                status = dbgdev_diq_submit_ib(
 469                                        dbgdev,
 470                                        adw_info->process->pasid,
 471                                        mem_obj->gpu_addr,
 472                                        packet_buff_uint,
 473                                        ib_size);
 474
 475                if (status) {
 476                        pr_err("Failed to submit IB to DIQ\n");
 477                        break;
 478                }
 479        }
 480
 481        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 482        return status;
 483}
 484
 485static int dbgdev_wave_control_set_registers(
 486                                struct dbg_wave_control_info *wac_info,
 487                                union SQ_CMD_BITS *in_reg_sq_cmd,
 488                                union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 489{
 490        int status = 0;
 491        union SQ_CMD_BITS reg_sq_cmd;
 492        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 493        struct HsaDbgWaveMsgAMDGen2 *pMsg;
 494
 495        reg_sq_cmd.u32All = 0;
 496        reg_gfx_index.u32All = 0;
 497        pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
 498
 499        switch (wac_info->mode) {
 500        /* Send command to single wave */
 501        case HSA_DBG_WAVEMODE_SINGLE:
 502                /*
 503                 * Limit access to the process waves only,
 504                 * by setting vmid check
 505                 */
 506                reg_sq_cmd.bits.check_vmid = 1;
 507                reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
 508                reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
 509                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
 510
 511                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 512                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 513                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 514
 515                break;
 516
 517        /* Send command to all waves with matching VMID */
 518        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
 519
 520                reg_gfx_index.bits.sh_broadcast_writes = 1;
 521                reg_gfx_index.bits.se_broadcast_writes = 1;
 522                reg_gfx_index.bits.instance_broadcast_writes = 1;
 523
 524                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 525
 526                break;
 527
 528        /* Send command to all CU waves with matching VMID */
 529        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
 530
 531                reg_sq_cmd.bits.check_vmid = 1;
 532                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 533
 534                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 535                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 536                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 537
 538                break;
 539
 540        default:
 541                return -EINVAL;
 542        }
 543
 544        switch (wac_info->operand) {
 545        case HSA_DBG_WAVEOP_HALT:
 546                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
 547                break;
 548
 549        case HSA_DBG_WAVEOP_RESUME:
 550                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
 551                break;
 552
 553        case HSA_DBG_WAVEOP_KILL:
 554                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
 555                break;
 556
 557        case HSA_DBG_WAVEOP_DEBUG:
 558                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
 559                break;
 560
 561        case HSA_DBG_WAVEOP_TRAP:
 562                if (wac_info->trapId < MAX_TRAPID) {
 563                        reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
 564                        reg_sq_cmd.bits.trap_id = wac_info->trapId;
 565                } else {
 566                        status = -EINVAL;
 567                }
 568                break;
 569
 570        default:
 571                status = -EINVAL;
 572                break;
 573        }
 574
 575        if (status == 0) {
 576                *in_reg_sq_cmd = reg_sq_cmd;
 577                *in_reg_gfx_index = reg_gfx_index;
 578        }
 579
 580        return status;
 581}
 582
 583static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 584                                        struct dbg_wave_control_info *wac_info)
 585{
 586
 587        int status;
 588        union SQ_CMD_BITS reg_sq_cmd;
 589        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 590        struct kfd_mem_obj *mem_obj;
 591        uint32_t *packet_buff_uint;
 592        struct pm4__set_config_reg *packets_vec;
 593        size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
 594
 595        reg_sq_cmd.u32All = 0;
 596
 597        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 598                                                        &reg_gfx_index);
 599        if (status) {
 600                pr_err("Failed to set wave control registers\n");
 601                return status;
 602        }
 603
 604        /* we do not control the VMID in DIQ, so reset it to a known value */
 605        reg_sq_cmd.bits.vm_id = 0;
 606
 607        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 608
 609        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 610        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 611        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 612        pr_debug("\t\t msg value is: %u\n",
 613                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 614        pr_debug("\t\t vmid      is: N/A\n");
 615
 616        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 617        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 618        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 619        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 620        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 621        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 622        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 623
 624        pr_debug("\t\t ibw       is : %u\n",
 625                        reg_gfx_index.bitfields.instance_broadcast_writes);
 626        pr_debug("\t\t ii        is : %u\n",
 627                        reg_gfx_index.bitfields.instance_index);
 628        pr_debug("\t\t sebw      is : %u\n",
 629                        reg_gfx_index.bitfields.se_broadcast_writes);
 630        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 631        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 632        pr_debug("\t\t sbw       is : %u\n",
 633                        reg_gfx_index.bitfields.sh_broadcast_writes);
 634
 635        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 636
 637        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 638
 639        if (status != 0) {
 640                pr_err("Failed to allocate GART memory\n");
 641                return status;
 642        }
 643
 644        packet_buff_uint = mem_obj->cpu_ptr;
 645
 646        memset(packet_buff_uint, 0, ib_size);
 647
 648        packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
 649        packets_vec[0].header.count = 1;
 650        packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
 651        packets_vec[0].header.type = PM4_TYPE_3;
 652        packets_vec[0].bitfields2.reg_offset =
 653                        GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 654
 655        packets_vec[0].bitfields2.insert_vmid = 0;
 656        packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
 657
 658        packets_vec[1].header.count = 1;
 659        packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 660        packets_vec[1].header.type = PM4_TYPE_3;
 661        packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
 662
 663        packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 664        packets_vec[1].bitfields2.insert_vmid = 1;
 665        packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
 666
 667        /* Restore the GRBM_GFX_INDEX register */
 668
 669        reg_gfx_index.u32All = 0;
 670        reg_gfx_index.bits.sh_broadcast_writes = 1;
 671        reg_gfx_index.bits.instance_broadcast_writes = 1;
 672        reg_gfx_index.bits.se_broadcast_writes = 1;
 673
 674
 675        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 676        packets_vec[2].bitfields2.reg_offset =
 677                                GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 678
 679        packets_vec[2].bitfields2.insert_vmid = 0;
 680        packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
 681
 682        status = dbgdev_diq_submit_ib(
 683                        dbgdev,
 684                        wac_info->process->pasid,
 685                        mem_obj->gpu_addr,
 686                        packet_buff_uint,
 687                        ib_size);
 688
 689        if (status)
 690                pr_err("Failed to submit IB to DIQ\n");
 691
 692        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 693
 694        return status;
 695}
 696
 697static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
 698                                        struct dbg_wave_control_info *wac_info)
 699{
 700        int status;
 701        union SQ_CMD_BITS reg_sq_cmd;
 702        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 703        struct kfd_process_device *pdd;
 704
 705        reg_sq_cmd.u32All = 0;
 706
 707        /* taking the VMID for that process on the safe way using PDD */
 708        pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
 709
 710        if (!pdd) {
 711                pr_err("Failed to get pdd for wave control no DIQ\n");
 712                return -EFAULT;
 713        }
 714        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 715                                                        &reg_gfx_index);
 716        if (status) {
 717                pr_err("Failed to set wave control registers\n");
 718                return status;
 719        }
 720
 721        /* for non DIQ we need to patch the VMID: */
 722
 723        reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
 724
 725        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 726
 727        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 728        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 729        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 730        pr_debug("\t\t msg value is: %u\n",
 731                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 732        pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
 733
 734        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 735        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 736        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 737        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 738        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 739        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 740        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 741
 742        pr_debug("\t\t ibw       is : %u\n",
 743                        reg_gfx_index.bitfields.instance_broadcast_writes);
 744        pr_debug("\t\t ii        is : %u\n",
 745                        reg_gfx_index.bitfields.instance_index);
 746        pr_debug("\t\t sebw      is : %u\n",
 747                        reg_gfx_index.bitfields.se_broadcast_writes);
 748        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 749        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 750        pr_debug("\t\t sbw       is : %u\n",
 751                        reg_gfx_index.bitfields.sh_broadcast_writes);
 752
 753        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 754
 755        return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
 756                                                        reg_gfx_index.u32All,
 757                                                        reg_sq_cmd.u32All);
 758}
 759
 760int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 761{
 762        int status = 0;
 763        unsigned int vmid;
 764        uint16_t queried_pasid;
 765        union SQ_CMD_BITS reg_sq_cmd;
 766        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 767        struct kfd_process_device *pdd;
 768        struct dbg_wave_control_info wac_info;
 769        int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
 770        int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
 771
 772        reg_sq_cmd.u32All = 0;
 773        status = 0;
 774
 775        wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
 776        wac_info.operand = HSA_DBG_WAVEOP_KILL;
 777
 778        pr_debug("Killing all process wavefronts\n");
 779
 780        /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
 781         * ATC_VMID15_PASID_MAPPING
 782         * to check which VMID the current process is mapped to.
 783         */
 784
 785        for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
 786                status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
 787                                (dev->kgd, vmid, &queried_pasid);
 788
 789                if (status && queried_pasid == p->pasid) {
 790                        pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
 791                                        vmid, p->pasid);
 792                        break;
 793                }
 794        }
 795
 796        if (vmid > last_vmid_to_scan) {
 797                pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
 798                return -EFAULT;
 799        }
 800
 801        /* taking the VMID for that process on the safe way using PDD */
 802        pdd = kfd_get_process_device_data(dev, p);
 803        if (!pdd)
 804                return -EFAULT;
 805
 806        status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
 807                        &reg_gfx_index);
 808        if (status != 0)
 809                return -EINVAL;
 810
 811        /* for non DIQ we need to patch the VMID: */
 812        reg_sq_cmd.bits.vm_id = vmid;
 813
 814        dev->kfd2kgd->wave_control_execute(dev->kgd,
 815                                        reg_gfx_index.u32All,
 816                                        reg_sq_cmd.u32All);
 817
 818        return 0;
 819}
 820
 821void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
 822                        enum DBGDEV_TYPE type)
 823{
 824        pdbgdev->dev = pdev;
 825        pdbgdev->kq = NULL;
 826        pdbgdev->type = type;
 827        pdbgdev->pqm = NULL;
 828
 829        switch (type) {
 830        case DBGDEV_TYPE_NODIQ:
 831                pdbgdev->dbgdev_register = dbgdev_register_nodiq;
 832                pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
 833                pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
 834                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
 835                break;
 836        case DBGDEV_TYPE_DIQ:
 837        default:
 838                pdbgdev->dbgdev_register = dbgdev_register_diq;
 839                pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
 840                pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
 841                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
 842                break;
 843        }
 844
 845}
 846