linux/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/log2.h>
  27#include <linux/sched.h>
  28#include <linux/slab.h>
  29#include <linux/mutex.h>
  30#include <linux/device.h>
  31
  32#include "kfd_pm4_headers.h"
  33#include "kfd_pm4_headers_diq.h"
  34#include "kfd_kernel_queue.h"
  35#include "kfd_priv.h"
  36#include "kfd_pm4_opcodes.h"
  37#include "cik_regs.h"
  38#include "kfd_dbgmgr.h"
  39#include "kfd_dbgdev.h"
  40#include "kfd_device_queue_manager.h"
  41#include "../../radeon/cik_reg.h"
  42
  43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  44{
  45        BUG_ON(!dev || !dev->kfd2kgd);
  46
  47        dev->kfd2kgd->address_watch_disable(dev->kgd);
  48}
  49
  50static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  51                                unsigned int pasid, uint64_t vmid0_address,
  52                                uint32_t *packet_buff, size_t size_in_bytes)
  53{
  54        struct pm4__release_mem *rm_packet;
  55        struct pm4__indirect_buffer_pasid *ib_packet;
  56        struct kfd_mem_obj *mem_obj;
  57        size_t pq_packets_size_in_bytes;
  58        union ULARGE_INTEGER *largep;
  59        union ULARGE_INTEGER addr;
  60        struct kernel_queue *kq;
  61        uint64_t *rm_state;
  62        unsigned int *ib_packet_buff;
  63        int status;
  64
  65        BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
  66
  67        kq = dbgdev->kq;
  68
  69        pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  70                                sizeof(struct pm4__indirect_buffer_pasid);
  71
  72        /*
  73         * We acquire a buffer from DIQ
  74         * The receive packet buff will be sitting on the Indirect Buffer
  75         * and in the PQ we put the IB packet + sync packet(s).
  76         */
  77        status = kq->ops.acquire_packet_buffer(kq,
  78                                pq_packets_size_in_bytes / sizeof(uint32_t),
  79                                &ib_packet_buff);
  80        if (status != 0) {
  81                pr_err("amdkfd: acquire_packet_buffer failed\n");
  82                return status;
  83        }
  84
  85        memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  86
  87        ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  88
  89        ib_packet->header.count = 3;
  90        ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  91        ib_packet->header.type = PM4_TYPE_3;
  92
  93        largep = (union ULARGE_INTEGER *) &vmid0_address;
  94
  95        ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  96        ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  97
  98        ib_packet->control = (1 << 23) | (1 << 31) |
  99                        ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
 100
 101        ib_packet->bitfields5.pasid = pasid;
 102
 103        /*
 104         * for now we use release mem for GPU-CPU synchronization
 105         * Consider WaitRegMem + WriteData as a better alternative
 106         * we get a GART allocations ( gpu/cpu mapping),
 107         * for the sync variable, and wait until:
 108         * (a) Sync with HW
 109         * (b) Sync var is written by CP to mem.
 110         */
 111        rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
 112                        (sizeof(struct pm4__indirect_buffer_pasid) /
 113                                        sizeof(unsigned int)));
 114
 115        status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
 116                                        &mem_obj);
 117
 118        if (status != 0) {
 119                pr_err("amdkfd: Failed to allocate GART memory\n");
 120                kq->ops.rollback_packet(kq);
 121                return status;
 122        }
 123
 124        rm_state = (uint64_t *) mem_obj->cpu_ptr;
 125
 126        *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
 127
 128        rm_packet->header.opcode = IT_RELEASE_MEM;
 129        rm_packet->header.type = PM4_TYPE_3;
 130        rm_packet->header.count = sizeof(struct pm4__release_mem) /
 131                                        sizeof(unsigned int) - 2;
 132
 133        rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
 134        rm_packet->bitfields2.event_index =
 135                                event_index___release_mem__end_of_pipe;
 136
 137        rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
 138        rm_packet->bitfields2.atc = 0;
 139        rm_packet->bitfields2.tc_wb_action_ena = 1;
 140
 141        addr.quad_part = mem_obj->gpu_addr;
 142
 143        rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
 144        rm_packet->address_hi = addr.u.high_part;
 145
 146        rm_packet->bitfields3.data_sel =
 147                                data_sel___release_mem__send_64_bit_data;
 148
 149        rm_packet->bitfields3.int_sel =
 150                        int_sel___release_mem__send_data_after_write_confirm;
 151
 152        rm_packet->bitfields3.dst_sel =
 153                        dst_sel___release_mem__memory_controller;
 154
 155        rm_packet->data_lo = QUEUESTATE__ACTIVE;
 156
 157        kq->ops.submit_packet(kq);
 158
 159        /* Wait till CP writes sync code: */
 160        status = amdkfd_fence_wait_timeout(
 161                        (unsigned int *) rm_state,
 162                        QUEUESTATE__ACTIVE, 1500);
 163
 164        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 165
 166        return status;
 167}
 168
 169static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
 170{
 171        BUG_ON(!dbgdev);
 172
 173        /*
 174         * no action is needed in this case,
 175         * just make sure diq will not be used
 176         */
 177
 178        dbgdev->kq = NULL;
 179
 180        return 0;
 181}
 182
 183static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 184{
 185        struct queue_properties properties;
 186        unsigned int qid;
 187        struct kernel_queue *kq = NULL;
 188        int status;
 189
 190        BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
 191
 192        status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
 193                                &properties, 0, KFD_QUEUE_TYPE_DIQ,
 194                                &qid);
 195
 196        if (status) {
 197                pr_err("amdkfd: Failed to create DIQ\n");
 198                return status;
 199        }
 200
 201        pr_debug("DIQ Created with queue id: %d\n", qid);
 202
 203        kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
 204
 205        if (kq == NULL) {
 206                pr_err("amdkfd: Error getting DIQ\n");
 207                pqm_destroy_queue(dbgdev->pqm, qid);
 208                return -EFAULT;
 209        }
 210
 211        dbgdev->kq = kq;
 212
 213        return status;
 214}
 215
 216static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
 217{
 218        BUG_ON(!dbgdev || !dbgdev->dev);
 219
 220        /* disable watch address */
 221        dbgdev_address_watch_disable_nodiq(dbgdev->dev);
 222        return 0;
 223}
 224
 225static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
 226{
 227        /* todo - disable address watch */
 228        int status;
 229
 230        BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
 231
 232        status = pqm_destroy_queue(dbgdev->pqm,
 233                        dbgdev->kq->queue->properties.queue_id);
 234        dbgdev->kq = NULL;
 235
 236        return status;
 237}
 238
 239static void dbgdev_address_watch_set_registers(
 240                        const struct dbg_address_watch_info *adw_info,
 241                        union TCP_WATCH_ADDR_H_BITS *addrHi,
 242                        union TCP_WATCH_ADDR_L_BITS *addrLo,
 243                        union TCP_WATCH_CNTL_BITS *cntl,
 244                        unsigned int index, unsigned int vmid)
 245{
 246        union ULARGE_INTEGER addr;
 247
 248        BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
 249
 250        addr.quad_part = 0;
 251        addrHi->u32All = 0;
 252        addrLo->u32All = 0;
 253        cntl->u32All = 0;
 254
 255        if (adw_info->watch_mask != NULL)
 256                cntl->bitfields.mask =
 257                        (uint32_t) (adw_info->watch_mask[index] &
 258                                        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
 259        else
 260                cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 261
 262        addr.quad_part = (unsigned long long) adw_info->watch_address[index];
 263
 264        addrHi->bitfields.addr = addr.u.high_part &
 265                                        ADDRESS_WATCH_REG_ADDHIGH_MASK;
 266        addrLo->bitfields.addr =
 267                        (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
 268
 269        cntl->bitfields.mode = adw_info->watch_mode[index];
 270        cntl->bitfields.vmid = (uint32_t) vmid;
 271        /* for now assume it is an ATC address */
 272        cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
 273
 274        pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
 275        pr_debug("\t\t%20s %08x\n", "set reg add high :",
 276                        addrHi->bitfields.addr);
 277        pr_debug("\t\t%20s %08x\n", "set reg add low :",
 278                        addrLo->bitfields.addr);
 279}
 280
 281static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
 282                                        struct dbg_address_watch_info *adw_info)
 283{
 284        union TCP_WATCH_ADDR_H_BITS addrHi;
 285        union TCP_WATCH_ADDR_L_BITS addrLo;
 286        union TCP_WATCH_CNTL_BITS cntl;
 287        struct kfd_process_device *pdd;
 288        unsigned int i;
 289
 290        BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
 291
 292        /* taking the vmid for that process on the safe way using pdd */
 293        pdd = kfd_get_process_device_data(dbgdev->dev,
 294                                        adw_info->process);
 295        if (!pdd) {
 296                pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
 297                return -EFAULT;
 298        }
 299
 300        addrHi.u32All = 0;
 301        addrLo.u32All = 0;
 302        cntl.u32All = 0;
 303
 304        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 305                        (adw_info->num_watch_points == 0)) {
 306                pr_err("amdkfd: num_watch_points is invalid\n");
 307                return -EINVAL;
 308        }
 309
 310        if ((adw_info->watch_mode == NULL) ||
 311                (adw_info->watch_address == NULL)) {
 312                pr_err("amdkfd: adw_info fields are not valid\n");
 313                return -EINVAL;
 314        }
 315
 316        for (i = 0 ; i < adw_info->num_watch_points ; i++) {
 317                dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
 318                                                &cntl, i, pdd->qpd.vmid);
 319
 320                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 321                pr_debug("\t\t%20s %08x\n", "register index :", i);
 322                pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
 323                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 324                                addrLo.bitfields.addr);
 325                pr_debug("\t\t%20s %08x\n", "Address high is :",
 326                                addrHi.bitfields.addr);
 327                pr_debug("\t\t%20s %08x\n", "Address high is :",
 328                                addrHi.bitfields.addr);
 329                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 330                                cntl.bitfields.mask);
 331                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 332                                cntl.bitfields.mode);
 333                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 334                                cntl.bitfields.vmid);
 335                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 336                                cntl.bitfields.atc);
 337                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 338
 339                pdd->dev->kfd2kgd->address_watch_execute(
 340                                                dbgdev->dev->kgd,
 341                                                i,
 342                                                cntl.u32All,
 343                                                addrHi.u32All,
 344                                                addrLo.u32All);
 345        }
 346
 347        return 0;
 348}
 349
 350static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 351                                        struct dbg_address_watch_info *adw_info)
 352{
 353        struct pm4__set_config_reg *packets_vec;
 354        union TCP_WATCH_ADDR_H_BITS addrHi;
 355        union TCP_WATCH_ADDR_L_BITS addrLo;
 356        union TCP_WATCH_CNTL_BITS cntl;
 357        struct kfd_mem_obj *mem_obj;
 358        unsigned int aw_reg_add_dword;
 359        uint32_t *packet_buff_uint;
 360        unsigned int i;
 361        int status;
 362        size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
 363        /* we do not control the vmid in DIQ mode, just a place holder */
 364        unsigned int vmid = 0;
 365
 366        BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
 367
 368        addrHi.u32All = 0;
 369        addrLo.u32All = 0;
 370        cntl.u32All = 0;
 371
 372        if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 373                        (adw_info->num_watch_points == 0)) {
 374                pr_err("amdkfd: num_watch_points is invalid\n");
 375                return -EINVAL;
 376        }
 377
 378        if ((NULL == adw_info->watch_mode) ||
 379                        (NULL == adw_info->watch_address)) {
 380                pr_err("amdkfd: adw_info fields are not valid\n");
 381                return -EINVAL;
 382        }
 383
 384        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 385
 386        if (status != 0) {
 387                pr_err("amdkfd: Failed to allocate GART memory\n");
 388                return status;
 389        }
 390
 391        packet_buff_uint = mem_obj->cpu_ptr;
 392
 393        memset(packet_buff_uint, 0, ib_size);
 394
 395        packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
 396
 397        packets_vec[0].header.count = 1;
 398        packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
 399        packets_vec[0].header.type = PM4_TYPE_3;
 400        packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 401        packets_vec[0].bitfields2.insert_vmid = 1;
 402        packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
 403        packets_vec[1].bitfields2.insert_vmid = 0;
 404        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 405        packets_vec[2].bitfields2.insert_vmid = 0;
 406        packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
 407        packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
 408        packets_vec[3].bitfields2.insert_vmid = 1;
 409
 410        for (i = 0; i < adw_info->num_watch_points; i++) {
 411                dbgdev_address_watch_set_registers(adw_info,
 412                                                &addrHi,
 413                                                &addrLo,
 414                                                &cntl,
 415                                                i,
 416                                                vmid);
 417
 418                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 419                pr_debug("\t\t%20s %08x\n", "register index :", i);
 420                pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
 421                pr_debug("\t\t%20s %p\n", "Add ptr is :",
 422                                adw_info->watch_address);
 423                pr_debug("\t\t%20s %08llx\n", "Add     is :",
 424                                adw_info->watch_address[i]);
 425                pr_debug("\t\t%20s %08x\n", "Address Low is :",
 426                                addrLo.bitfields.addr);
 427                pr_debug("\t\t%20s %08x\n", "Address high is :",
 428                                addrHi.bitfields.addr);
 429                pr_debug("\t\t%20s %08x\n", "Control Mask is :",
 430                                cntl.bitfields.mask);
 431                pr_debug("\t\t%20s %08x\n", "Control Mode is :",
 432                                cntl.bitfields.mode);
 433                pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
 434                                cntl.bitfields.vmid);
 435                pr_debug("\t\t%20s %08x\n", "Control atc  is :",
 436                                cntl.bitfields.atc);
 437                pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
 438
 439                aw_reg_add_dword =
 440                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 441                                        dbgdev->dev->kgd,
 442                                        i,
 443                                        ADDRESS_WATCH_REG_CNTL);
 444
 445                aw_reg_add_dword /= sizeof(uint32_t);
 446
 447                packets_vec[0].bitfields2.reg_offset =
 448                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 449
 450                packets_vec[0].reg_data[0] = cntl.u32All;
 451
 452                aw_reg_add_dword =
 453                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 454                                        dbgdev->dev->kgd,
 455                                        i,
 456                                        ADDRESS_WATCH_REG_ADDR_HI);
 457
 458                aw_reg_add_dword /= sizeof(uint32_t);
 459
 460                packets_vec[1].bitfields2.reg_offset =
 461                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 462                packets_vec[1].reg_data[0] = addrHi.u32All;
 463
 464                aw_reg_add_dword =
 465                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 466                                        dbgdev->dev->kgd,
 467                                        i,
 468                                        ADDRESS_WATCH_REG_ADDR_LO);
 469
 470                aw_reg_add_dword /= sizeof(uint32_t);
 471
 472                packets_vec[2].bitfields2.reg_offset =
 473                                aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 474                packets_vec[2].reg_data[0] = addrLo.u32All;
 475
 476                /* enable watch flag if address is not zero*/
 477                if (adw_info->watch_address[i] > 0)
 478                        cntl.bitfields.valid = 1;
 479                else
 480                        cntl.bitfields.valid = 0;
 481
 482                aw_reg_add_dword =
 483                                dbgdev->dev->kfd2kgd->address_watch_get_offset(
 484                                        dbgdev->dev->kgd,
 485                                        i,
 486                                        ADDRESS_WATCH_REG_CNTL);
 487
 488                aw_reg_add_dword /= sizeof(uint32_t);
 489
 490                packets_vec[3].bitfields2.reg_offset =
 491                                        aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 492                packets_vec[3].reg_data[0] = cntl.u32All;
 493
 494                status = dbgdev_diq_submit_ib(
 495                                        dbgdev,
 496                                        adw_info->process->pasid,
 497                                        mem_obj->gpu_addr,
 498                                        packet_buff_uint,
 499                                        ib_size);
 500
 501                if (status != 0) {
 502                        pr_err("amdkfd: Failed to submit IB to DIQ\n");
 503                        break;
 504                }
 505        }
 506
 507        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 508        return status;
 509}
 510
 511static int dbgdev_wave_control_set_registers(
 512                                struct dbg_wave_control_info *wac_info,
 513                                union SQ_CMD_BITS *in_reg_sq_cmd,
 514                                union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 515{
 516        int status = 0;
 517        union SQ_CMD_BITS reg_sq_cmd;
 518        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 519        struct HsaDbgWaveMsgAMDGen2 *pMsg;
 520
 521        BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
 522
 523        reg_sq_cmd.u32All = 0;
 524        reg_gfx_index.u32All = 0;
 525        pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
 526
 527        switch (wac_info->mode) {
 528        /* Send command to single wave */
 529        case HSA_DBG_WAVEMODE_SINGLE:
 530                /*
 531                 * Limit access to the process waves only,
 532                 * by setting vmid check
 533                 */
 534                reg_sq_cmd.bits.check_vmid = 1;
 535                reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
 536                reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
 537                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
 538
 539                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 540                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 541                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 542
 543                break;
 544
 545        /* Send command to all waves with matching VMID */
 546        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
 547
 548                reg_gfx_index.bits.sh_broadcast_writes = 1;
 549                reg_gfx_index.bits.se_broadcast_writes = 1;
 550                reg_gfx_index.bits.instance_broadcast_writes = 1;
 551
 552                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 553
 554                break;
 555
 556        /* Send command to all CU waves with matching VMID */
 557        case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
 558
 559                reg_sq_cmd.bits.check_vmid = 1;
 560                reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
 561
 562                reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
 563                reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
 564                reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
 565
 566                break;
 567
 568        default:
 569                return -EINVAL;
 570        }
 571
 572        switch (wac_info->operand) {
 573        case HSA_DBG_WAVEOP_HALT:
 574                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
 575                break;
 576
 577        case HSA_DBG_WAVEOP_RESUME:
 578                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
 579                break;
 580
 581        case HSA_DBG_WAVEOP_KILL:
 582                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
 583                break;
 584
 585        case HSA_DBG_WAVEOP_DEBUG:
 586                reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
 587                break;
 588
 589        case HSA_DBG_WAVEOP_TRAP:
 590                if (wac_info->trapId < MAX_TRAPID) {
 591                        reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
 592                        reg_sq_cmd.bits.trap_id = wac_info->trapId;
 593                } else {
 594                        status = -EINVAL;
 595                }
 596                break;
 597
 598        default:
 599                status = -EINVAL;
 600                break;
 601        }
 602
 603        if (status == 0) {
 604                *in_reg_sq_cmd = reg_sq_cmd;
 605                *in_reg_gfx_index = reg_gfx_index;
 606        }
 607
 608        return status;
 609}
 610
 611static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 612                                        struct dbg_wave_control_info *wac_info)
 613{
 614
 615        int status;
 616        union SQ_CMD_BITS reg_sq_cmd;
 617        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 618        struct kfd_mem_obj *mem_obj;
 619        uint32_t *packet_buff_uint;
 620        struct pm4__set_config_reg *packets_vec;
 621        size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
 622
 623        BUG_ON(!dbgdev || !wac_info);
 624
 625        reg_sq_cmd.u32All = 0;
 626
 627        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 628                                                        &reg_gfx_index);
 629        if (status) {
 630                pr_err("amdkfd: Failed to set wave control registers\n");
 631                return status;
 632        }
 633
 634        /* we do not control the VMID in DIQ,so reset it to a known value */
 635        reg_sq_cmd.bits.vm_id = 0;
 636
 637        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 638
 639        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 640        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 641        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 642        pr_debug("\t\t msg value is: %u\n",
 643                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 644        pr_debug("\t\t vmid      is: N/A\n");
 645
 646        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 647        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 648        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 649        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 650        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 651        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 652        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 653
 654        pr_debug("\t\t ibw       is : %u\n",
 655                        reg_gfx_index.bitfields.instance_broadcast_writes);
 656        pr_debug("\t\t ii        is : %u\n",
 657                        reg_gfx_index.bitfields.instance_index);
 658        pr_debug("\t\t sebw      is : %u\n",
 659                        reg_gfx_index.bitfields.se_broadcast_writes);
 660        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 661        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 662        pr_debug("\t\t sbw       is : %u\n",
 663                        reg_gfx_index.bitfields.sh_broadcast_writes);
 664
 665        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 666
 667        status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
 668
 669        if (status != 0) {
 670                pr_err("amdkfd: Failed to allocate GART memory\n");
 671                return status;
 672        }
 673
 674        packet_buff_uint = mem_obj->cpu_ptr;
 675
 676        memset(packet_buff_uint, 0, ib_size);
 677
 678        packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
 679        packets_vec[0].header.count = 1;
 680        packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
 681        packets_vec[0].header.type = PM4_TYPE_3;
 682        packets_vec[0].bitfields2.reg_offset =
 683                        GRBM_GFX_INDEX / (sizeof(uint32_t)) -
 684                                USERCONFIG_REG_BASE;
 685
 686        packets_vec[0].bitfields2.insert_vmid = 0;
 687        packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
 688
 689        packets_vec[1].header.count = 1;
 690        packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 691        packets_vec[1].header.type = PM4_TYPE_3;
 692        packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
 693                                                AMD_CONFIG_REG_BASE;
 694
 695        packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 696        packets_vec[1].bitfields2.insert_vmid = 1;
 697        packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
 698
 699        /* Restore the GRBM_GFX_INDEX register */
 700
 701        reg_gfx_index.u32All = 0;
 702        reg_gfx_index.bits.sh_broadcast_writes = 1;
 703        reg_gfx_index.bits.instance_broadcast_writes = 1;
 704        reg_gfx_index.bits.se_broadcast_writes = 1;
 705
 706
 707        packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 708        packets_vec[2].bitfields2.reg_offset =
 709                                GRBM_GFX_INDEX / (sizeof(uint32_t)) -
 710                                        USERCONFIG_REG_BASE;
 711
 712        packets_vec[2].bitfields2.insert_vmid = 0;
 713        packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
 714
 715        status = dbgdev_diq_submit_ib(
 716                        dbgdev,
 717                        wac_info->process->pasid,
 718                        mem_obj->gpu_addr,
 719                        packet_buff_uint,
 720                        ib_size);
 721
 722        if (status != 0)
 723                pr_err("amdkfd: Failed to submit IB to DIQ\n");
 724
 725        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 726
 727        return status;
 728}
 729
 730static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
 731                                        struct dbg_wave_control_info *wac_info)
 732{
 733        int status;
 734        union SQ_CMD_BITS reg_sq_cmd;
 735        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 736        struct kfd_process_device *pdd;
 737
 738        BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
 739
 740        reg_sq_cmd.u32All = 0;
 741
 742        /* taking the VMID for that process on the safe way using PDD */
 743        pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
 744
 745        if (!pdd) {
 746                pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
 747                return -EFAULT;
 748        }
 749        status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 750                                                        &reg_gfx_index);
 751        if (status) {
 752                pr_err("amdkfd: Failed to set wave control registers\n");
 753                return status;
 754        }
 755
 756        /* for non DIQ we need to patch the VMID: */
 757
 758        reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
 759
 760        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 761
 762        pr_debug("\t\t mode      is: %u\n", wac_info->mode);
 763        pr_debug("\t\t operand   is: %u\n", wac_info->operand);
 764        pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
 765        pr_debug("\t\t msg value is: %u\n",
 766                        wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 767        pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
 768
 769        pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
 770        pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
 771        pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
 772        pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
 773        pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
 774        pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
 775        pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
 776
 777        pr_debug("\t\t ibw       is : %u\n",
 778                        reg_gfx_index.bitfields.instance_broadcast_writes);
 779        pr_debug("\t\t ii        is : %u\n",
 780                        reg_gfx_index.bitfields.instance_index);
 781        pr_debug("\t\t sebw      is : %u\n",
 782                        reg_gfx_index.bitfields.se_broadcast_writes);
 783        pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
 784        pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
 785        pr_debug("\t\t sbw       is : %u\n",
 786                        reg_gfx_index.bitfields.sh_broadcast_writes);
 787
 788        pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 789
 790        return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
 791                                                        reg_gfx_index.u32All,
 792                                                        reg_sq_cmd.u32All);
 793}
 794
 795int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 796{
 797        int status = 0;
 798        unsigned int vmid;
 799        union SQ_CMD_BITS reg_sq_cmd;
 800        union GRBM_GFX_INDEX_BITS reg_gfx_index;
 801        struct kfd_process_device *pdd;
 802        struct dbg_wave_control_info wac_info;
 803        int temp;
 804        int first_vmid_to_scan = 8;
 805        int last_vmid_to_scan = 15;
 806
 807        first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
 808        temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
 809        last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
 810
 811        reg_sq_cmd.u32All = 0;
 812        status = 0;
 813
 814        wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
 815        wac_info.operand = HSA_DBG_WAVEOP_KILL;
 816
 817        pr_debug("Killing all process wavefronts\n");
 818
 819        /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
 820         * ATC_VMID15_PASID_MAPPING
 821         * to check which VMID the current process is mapped to. */
 822
 823        for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
 824                if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
 825                                (dev->kgd, vmid)) {
 826                        if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
 827                                        (dev->kgd, vmid) == p->pasid) {
 828                                pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
 829                                                vmid, p->pasid);
 830                                break;
 831                        }
 832                }
 833        }
 834
 835        if (vmid > last_vmid_to_scan) {
 836                pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
 837                return -EFAULT;
 838        }
 839
 840        /* taking the VMID for that process on the safe way using PDD */
 841        pdd = kfd_get_process_device_data(dev, p);
 842        if (!pdd)
 843                return -EFAULT;
 844
 845        status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
 846                        &reg_gfx_index);
 847        if (status != 0)
 848                return -EINVAL;
 849
 850        /* for non DIQ we need to patch the VMID: */
 851        reg_sq_cmd.bits.vm_id = vmid;
 852
 853        dev->kfd2kgd->wave_control_execute(dev->kgd,
 854                                        reg_gfx_index.u32All,
 855                                        reg_sq_cmd.u32All);
 856
 857        return 0;
 858}
 859
 860void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
 861                        enum DBGDEV_TYPE type)
 862{
 863        BUG_ON(!pdbgdev || !pdev);
 864
 865        pdbgdev->dev = pdev;
 866        pdbgdev->kq = NULL;
 867        pdbgdev->type = type;
 868        pdbgdev->pqm = NULL;
 869
 870        switch (type) {
 871        case DBGDEV_TYPE_NODIQ:
 872                pdbgdev->dbgdev_register = dbgdev_register_nodiq;
 873                pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
 874                pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
 875                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
 876                break;
 877        case DBGDEV_TYPE_DIQ:
 878        default:
 879                pdbgdev->dbgdev_register = dbgdev_register_diq;
 880                pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
 881                pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
 882                pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
 883                break;
 884        }
 885
 886}
 887