linux/drivers/gpu/drm/amd/amdkfd/kfd_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/bsearch.h>
  24#include <linux/pci.h>
  25#include <linux/slab.h>
  26#include "kfd_priv.h"
  27#include "kfd_device_queue_manager.h"
  28#include "kfd_pm4_headers_vi.h"
  29#include "cwsr_trap_handler.h"
  30#include "kfd_iommu.h"
  31#include "amdgpu_amdkfd.h"
  32
  33#define MQD_SIZE_ALIGNED 768
  34
  35/*
  36 * kfd_locked is used to lock the kfd driver during suspend or reset
  37 * once locked, kfd driver will stop any further GPU execution.
  38 * create process (open) will return -EAGAIN.
  39 */
  40static atomic_t kfd_locked = ATOMIC_INIT(0);
  41
  42#ifdef KFD_SUPPORT_IOMMU_V2
  43static const struct kfd_device_info kaveri_device_info = {
  44        .asic_family = CHIP_KAVERI,
  45        .max_pasid_bits = 16,
  46        /* max num of queues for KV.TODO should be a dynamic value */
  47        .max_no_of_hqd  = 24,
  48        .doorbell_size  = 4,
  49        .ih_ring_entry_size = 4 * sizeof(uint32_t),
  50        .event_interrupt_class = &event_interrupt_class_cik,
  51        .num_of_watch_points = 4,
  52        .mqd_size_aligned = MQD_SIZE_ALIGNED,
  53        .supports_cwsr = false,
  54        .needs_iommu_device = true,
  55        .needs_pci_atomics = false,
  56        .num_sdma_engines = 2,
  57        .num_xgmi_sdma_engines = 0,
  58        .num_sdma_queues_per_engine = 2,
  59};
  60
  61static const struct kfd_device_info carrizo_device_info = {
  62        .asic_family = CHIP_CARRIZO,
  63        .max_pasid_bits = 16,
  64        /* max num of queues for CZ.TODO should be a dynamic value */
  65        .max_no_of_hqd  = 24,
  66        .doorbell_size  = 4,
  67        .ih_ring_entry_size = 4 * sizeof(uint32_t),
  68        .event_interrupt_class = &event_interrupt_class_cik,
  69        .num_of_watch_points = 4,
  70        .mqd_size_aligned = MQD_SIZE_ALIGNED,
  71        .supports_cwsr = true,
  72        .needs_iommu_device = true,
  73        .needs_pci_atomics = false,
  74        .num_sdma_engines = 2,
  75        .num_xgmi_sdma_engines = 0,
  76        .num_sdma_queues_per_engine = 2,
  77};
  78
  79static const struct kfd_device_info raven_device_info = {
  80        .asic_family = CHIP_RAVEN,
  81        .max_pasid_bits = 16,
  82        .max_no_of_hqd  = 24,
  83        .doorbell_size  = 8,
  84        .ih_ring_entry_size = 8 * sizeof(uint32_t),
  85        .event_interrupt_class = &event_interrupt_class_v9,
  86        .num_of_watch_points = 4,
  87        .mqd_size_aligned = MQD_SIZE_ALIGNED,
  88        .supports_cwsr = true,
  89        .needs_iommu_device = true,
  90        .needs_pci_atomics = true,
  91        .num_sdma_engines = 1,
  92        .num_xgmi_sdma_engines = 0,
  93        .num_sdma_queues_per_engine = 2,
  94};
  95#endif
  96
  97static const struct kfd_device_info hawaii_device_info = {
  98        .asic_family = CHIP_HAWAII,
  99        .max_pasid_bits = 16,
 100        /* max num of queues for KV.TODO should be a dynamic value */
 101        .max_no_of_hqd  = 24,
 102        .doorbell_size  = 4,
 103        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 104        .event_interrupt_class = &event_interrupt_class_cik,
 105        .num_of_watch_points = 4,
 106        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 107        .supports_cwsr = false,
 108        .needs_iommu_device = false,
 109        .needs_pci_atomics = false,
 110        .num_sdma_engines = 2,
 111        .num_xgmi_sdma_engines = 0,
 112        .num_sdma_queues_per_engine = 2,
 113};
 114
 115static const struct kfd_device_info tonga_device_info = {
 116        .asic_family = CHIP_TONGA,
 117        .max_pasid_bits = 16,
 118        .max_no_of_hqd  = 24,
 119        .doorbell_size  = 4,
 120        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 121        .event_interrupt_class = &event_interrupt_class_cik,
 122        .num_of_watch_points = 4,
 123        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 124        .supports_cwsr = false,
 125        .needs_iommu_device = false,
 126        .needs_pci_atomics = true,
 127        .num_sdma_engines = 2,
 128        .num_xgmi_sdma_engines = 0,
 129        .num_sdma_queues_per_engine = 2,
 130};
 131
 132static const struct kfd_device_info fiji_device_info = {
 133        .asic_family = CHIP_FIJI,
 134        .max_pasid_bits = 16,
 135        .max_no_of_hqd  = 24,
 136        .doorbell_size  = 4,
 137        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 138        .event_interrupt_class = &event_interrupt_class_cik,
 139        .num_of_watch_points = 4,
 140        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 141        .supports_cwsr = true,
 142        .needs_iommu_device = false,
 143        .needs_pci_atomics = true,
 144        .num_sdma_engines = 2,
 145        .num_xgmi_sdma_engines = 0,
 146        .num_sdma_queues_per_engine = 2,
 147};
 148
 149static const struct kfd_device_info fiji_vf_device_info = {
 150        .asic_family = CHIP_FIJI,
 151        .max_pasid_bits = 16,
 152        .max_no_of_hqd  = 24,
 153        .doorbell_size  = 4,
 154        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 155        .event_interrupt_class = &event_interrupt_class_cik,
 156        .num_of_watch_points = 4,
 157        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 158        .supports_cwsr = true,
 159        .needs_iommu_device = false,
 160        .needs_pci_atomics = false,
 161        .num_sdma_engines = 2,
 162        .num_xgmi_sdma_engines = 0,
 163        .num_sdma_queues_per_engine = 2,
 164};
 165
 166
 167static const struct kfd_device_info polaris10_device_info = {
 168        .asic_family = CHIP_POLARIS10,
 169        .max_pasid_bits = 16,
 170        .max_no_of_hqd  = 24,
 171        .doorbell_size  = 4,
 172        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 173        .event_interrupt_class = &event_interrupt_class_cik,
 174        .num_of_watch_points = 4,
 175        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 176        .supports_cwsr = true,
 177        .needs_iommu_device = false,
 178        .needs_pci_atomics = true,
 179        .num_sdma_engines = 2,
 180        .num_xgmi_sdma_engines = 0,
 181        .num_sdma_queues_per_engine = 2,
 182};
 183
 184static const struct kfd_device_info polaris10_vf_device_info = {
 185        .asic_family = CHIP_POLARIS10,
 186        .max_pasid_bits = 16,
 187        .max_no_of_hqd  = 24,
 188        .doorbell_size  = 4,
 189        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 190        .event_interrupt_class = &event_interrupt_class_cik,
 191        .num_of_watch_points = 4,
 192        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 193        .supports_cwsr = true,
 194        .needs_iommu_device = false,
 195        .needs_pci_atomics = false,
 196        .num_sdma_engines = 2,
 197        .num_xgmi_sdma_engines = 0,
 198        .num_sdma_queues_per_engine = 2,
 199};
 200
 201static const struct kfd_device_info polaris11_device_info = {
 202        .asic_family = CHIP_POLARIS11,
 203        .max_pasid_bits = 16,
 204        .max_no_of_hqd  = 24,
 205        .doorbell_size  = 4,
 206        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 207        .event_interrupt_class = &event_interrupt_class_cik,
 208        .num_of_watch_points = 4,
 209        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 210        .supports_cwsr = true,
 211        .needs_iommu_device = false,
 212        .needs_pci_atomics = true,
 213        .num_sdma_engines = 2,
 214        .num_xgmi_sdma_engines = 0,
 215        .num_sdma_queues_per_engine = 2,
 216};
 217
 218static const struct kfd_device_info polaris12_device_info = {
 219        .asic_family = CHIP_POLARIS12,
 220        .max_pasid_bits = 16,
 221        .max_no_of_hqd  = 24,
 222        .doorbell_size  = 4,
 223        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 224        .event_interrupt_class = &event_interrupt_class_cik,
 225        .num_of_watch_points = 4,
 226        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 227        .supports_cwsr = true,
 228        .needs_iommu_device = false,
 229        .needs_pci_atomics = true,
 230        .num_sdma_engines = 2,
 231        .num_xgmi_sdma_engines = 0,
 232        .num_sdma_queues_per_engine = 2,
 233};
 234
 235static const struct kfd_device_info vegam_device_info = {
 236        .asic_family = CHIP_VEGAM,
 237        .max_pasid_bits = 16,
 238        .max_no_of_hqd  = 24,
 239        .doorbell_size  = 4,
 240        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 241        .event_interrupt_class = &event_interrupt_class_cik,
 242        .num_of_watch_points = 4,
 243        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 244        .supports_cwsr = true,
 245        .needs_iommu_device = false,
 246        .needs_pci_atomics = true,
 247        .num_sdma_engines = 2,
 248        .num_xgmi_sdma_engines = 0,
 249        .num_sdma_queues_per_engine = 2,
 250};
 251
 252static const struct kfd_device_info vega10_device_info = {
 253        .asic_family = CHIP_VEGA10,
 254        .max_pasid_bits = 16,
 255        .max_no_of_hqd  = 24,
 256        .doorbell_size  = 8,
 257        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 258        .event_interrupt_class = &event_interrupt_class_v9,
 259        .num_of_watch_points = 4,
 260        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 261        .supports_cwsr = true,
 262        .needs_iommu_device = false,
 263        .needs_pci_atomics = false,
 264        .num_sdma_engines = 2,
 265        .num_xgmi_sdma_engines = 0,
 266        .num_sdma_queues_per_engine = 2,
 267};
 268
 269static const struct kfd_device_info vega10_vf_device_info = {
 270        .asic_family = CHIP_VEGA10,
 271        .max_pasid_bits = 16,
 272        .max_no_of_hqd  = 24,
 273        .doorbell_size  = 8,
 274        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 275        .event_interrupt_class = &event_interrupt_class_v9,
 276        .num_of_watch_points = 4,
 277        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 278        .supports_cwsr = true,
 279        .needs_iommu_device = false,
 280        .needs_pci_atomics = false,
 281        .num_sdma_engines = 2,
 282        .num_xgmi_sdma_engines = 0,
 283        .num_sdma_queues_per_engine = 2,
 284};
 285
 286static const struct kfd_device_info vega12_device_info = {
 287        .asic_family = CHIP_VEGA12,
 288        .max_pasid_bits = 16,
 289        .max_no_of_hqd  = 24,
 290        .doorbell_size  = 8,
 291        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 292        .event_interrupt_class = &event_interrupt_class_v9,
 293        .num_of_watch_points = 4,
 294        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 295        .supports_cwsr = true,
 296        .needs_iommu_device = false,
 297        .needs_pci_atomics = false,
 298        .num_sdma_engines = 2,
 299        .num_xgmi_sdma_engines = 0,
 300        .num_sdma_queues_per_engine = 2,
 301};
 302
 303static const struct kfd_device_info vega20_device_info = {
 304        .asic_family = CHIP_VEGA20,
 305        .max_pasid_bits = 16,
 306        .max_no_of_hqd  = 24,
 307        .doorbell_size  = 8,
 308        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 309        .event_interrupt_class = &event_interrupt_class_v9,
 310        .num_of_watch_points = 4,
 311        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 312        .supports_cwsr = true,
 313        .needs_iommu_device = false,
 314        .needs_pci_atomics = false,
 315        .num_sdma_engines = 2,
 316        .num_xgmi_sdma_engines = 0,
 317        .num_sdma_queues_per_engine = 8,
 318};
 319
 320static const struct kfd_device_info navi10_device_info = {
 321        .asic_family = CHIP_NAVI10,
 322        .max_pasid_bits = 16,
 323        .max_no_of_hqd  = 24,
 324        .doorbell_size  = 8,
 325        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 326        .event_interrupt_class = &event_interrupt_class_v9,
 327        .num_of_watch_points = 4,
 328        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 329        .needs_iommu_device = false,
 330        .supports_cwsr = true,
 331        .needs_pci_atomics = false,
 332        .num_sdma_engines = 2,
 333        .num_xgmi_sdma_engines = 0,
 334        .num_sdma_queues_per_engine = 8,
 335};
 336
 337struct kfd_deviceid {
 338        unsigned short did;
 339        const struct kfd_device_info *device_info;
 340};
 341
 342static const struct kfd_deviceid supported_devices[] = {
 343#ifdef KFD_SUPPORT_IOMMU_V2
 344        { 0x1304, &kaveri_device_info },        /* Kaveri */
 345        { 0x1305, &kaveri_device_info },        /* Kaveri */
 346        { 0x1306, &kaveri_device_info },        /* Kaveri */
 347        { 0x1307, &kaveri_device_info },        /* Kaveri */
 348        { 0x1309, &kaveri_device_info },        /* Kaveri */
 349        { 0x130A, &kaveri_device_info },        /* Kaveri */
 350        { 0x130B, &kaveri_device_info },        /* Kaveri */
 351        { 0x130C, &kaveri_device_info },        /* Kaveri */
 352        { 0x130D, &kaveri_device_info },        /* Kaveri */
 353        { 0x130E, &kaveri_device_info },        /* Kaveri */
 354        { 0x130F, &kaveri_device_info },        /* Kaveri */
 355        { 0x1310, &kaveri_device_info },        /* Kaveri */
 356        { 0x1311, &kaveri_device_info },        /* Kaveri */
 357        { 0x1312, &kaveri_device_info },        /* Kaveri */
 358        { 0x1313, &kaveri_device_info },        /* Kaveri */
 359        { 0x1315, &kaveri_device_info },        /* Kaveri */
 360        { 0x1316, &kaveri_device_info },        /* Kaveri */
 361        { 0x1317, &kaveri_device_info },        /* Kaveri */
 362        { 0x1318, &kaveri_device_info },        /* Kaveri */
 363        { 0x131B, &kaveri_device_info },        /* Kaveri */
 364        { 0x131C, &kaveri_device_info },        /* Kaveri */
 365        { 0x131D, &kaveri_device_info },        /* Kaveri */
 366        { 0x9870, &carrizo_device_info },       /* Carrizo */
 367        { 0x9874, &carrizo_device_info },       /* Carrizo */
 368        { 0x9875, &carrizo_device_info },       /* Carrizo */
 369        { 0x9876, &carrizo_device_info },       /* Carrizo */
 370        { 0x9877, &carrizo_device_info },       /* Carrizo */
 371        { 0x15DD, &raven_device_info },         /* Raven */
 372        { 0x15D8, &raven_device_info },         /* Raven */
 373#endif
 374        { 0x67A0, &hawaii_device_info },        /* Hawaii */
 375        { 0x67A1, &hawaii_device_info },        /* Hawaii */
 376        { 0x67A2, &hawaii_device_info },        /* Hawaii */
 377        { 0x67A8, &hawaii_device_info },        /* Hawaii */
 378        { 0x67A9, &hawaii_device_info },        /* Hawaii */
 379        { 0x67AA, &hawaii_device_info },        /* Hawaii */
 380        { 0x67B0, &hawaii_device_info },        /* Hawaii */
 381        { 0x67B1, &hawaii_device_info },        /* Hawaii */
 382        { 0x67B8, &hawaii_device_info },        /* Hawaii */
 383        { 0x67B9, &hawaii_device_info },        /* Hawaii */
 384        { 0x67BA, &hawaii_device_info },        /* Hawaii */
 385        { 0x67BE, &hawaii_device_info },        /* Hawaii */
 386        { 0x6920, &tonga_device_info },         /* Tonga */
 387        { 0x6921, &tonga_device_info },         /* Tonga */
 388        { 0x6928, &tonga_device_info },         /* Tonga */
 389        { 0x6929, &tonga_device_info },         /* Tonga */
 390        { 0x692B, &tonga_device_info },         /* Tonga */
 391        { 0x6938, &tonga_device_info },         /* Tonga */
 392        { 0x6939, &tonga_device_info },         /* Tonga */
 393        { 0x7300, &fiji_device_info },          /* Fiji */
 394        { 0x730F, &fiji_vf_device_info },       /* Fiji vf*/
 395        { 0x67C0, &polaris10_device_info },     /* Polaris10 */
 396        { 0x67C1, &polaris10_device_info },     /* Polaris10 */
 397        { 0x67C2, &polaris10_device_info },     /* Polaris10 */
 398        { 0x67C4, &polaris10_device_info },     /* Polaris10 */
 399        { 0x67C7, &polaris10_device_info },     /* Polaris10 */
 400        { 0x67C8, &polaris10_device_info },     /* Polaris10 */
 401        { 0x67C9, &polaris10_device_info },     /* Polaris10 */
 402        { 0x67CA, &polaris10_device_info },     /* Polaris10 */
 403        { 0x67CC, &polaris10_device_info },     /* Polaris10 */
 404        { 0x67CF, &polaris10_device_info },     /* Polaris10 */
 405        { 0x67D0, &polaris10_vf_device_info },  /* Polaris10 vf*/
 406        { 0x67DF, &polaris10_device_info },     /* Polaris10 */
 407        { 0x6FDF, &polaris10_device_info },     /* Polaris10 */
 408        { 0x67E0, &polaris11_device_info },     /* Polaris11 */
 409        { 0x67E1, &polaris11_device_info },     /* Polaris11 */
 410        { 0x67E3, &polaris11_device_info },     /* Polaris11 */
 411        { 0x67E7, &polaris11_device_info },     /* Polaris11 */
 412        { 0x67E8, &polaris11_device_info },     /* Polaris11 */
 413        { 0x67E9, &polaris11_device_info },     /* Polaris11 */
 414        { 0x67EB, &polaris11_device_info },     /* Polaris11 */
 415        { 0x67EF, &polaris11_device_info },     /* Polaris11 */
 416        { 0x67FF, &polaris11_device_info },     /* Polaris11 */
 417        { 0x6980, &polaris12_device_info },     /* Polaris12 */
 418        { 0x6981, &polaris12_device_info },     /* Polaris12 */
 419        { 0x6985, &polaris12_device_info },     /* Polaris12 */
 420        { 0x6986, &polaris12_device_info },     /* Polaris12 */
 421        { 0x6987, &polaris12_device_info },     /* Polaris12 */
 422        { 0x6995, &polaris12_device_info },     /* Polaris12 */
 423        { 0x6997, &polaris12_device_info },     /* Polaris12 */
 424        { 0x699F, &polaris12_device_info },     /* Polaris12 */
 425        { 0x694C, &vegam_device_info },         /* VegaM */
 426        { 0x694E, &vegam_device_info },         /* VegaM */
 427        { 0x694F, &vegam_device_info },         /* VegaM */
 428        { 0x6860, &vega10_device_info },        /* Vega10 */
 429        { 0x6861, &vega10_device_info },        /* Vega10 */
 430        { 0x6862, &vega10_device_info },        /* Vega10 */
 431        { 0x6863, &vega10_device_info },        /* Vega10 */
 432        { 0x6864, &vega10_device_info },        /* Vega10 */
 433        { 0x6867, &vega10_device_info },        /* Vega10 */
 434        { 0x6868, &vega10_device_info },        /* Vega10 */
 435        { 0x6869, &vega10_device_info },        /* Vega10 */
 436        { 0x686A, &vega10_device_info },        /* Vega10 */
 437        { 0x686B, &vega10_device_info },        /* Vega10 */
 438        { 0x686C, &vega10_vf_device_info },     /* Vega10  vf*/
 439        { 0x686D, &vega10_device_info },        /* Vega10 */
 440        { 0x686E, &vega10_device_info },        /* Vega10 */
 441        { 0x686F, &vega10_device_info },        /* Vega10 */
 442        { 0x687F, &vega10_device_info },        /* Vega10 */
 443        { 0x69A0, &vega12_device_info },        /* Vega12 */
 444        { 0x69A1, &vega12_device_info },        /* Vega12 */
 445        { 0x69A2, &vega12_device_info },        /* Vega12 */
 446        { 0x69A3, &vega12_device_info },        /* Vega12 */
 447        { 0x69AF, &vega12_device_info },        /* Vega12 */
 448        { 0x66a0, &vega20_device_info },        /* Vega20 */
 449        { 0x66a1, &vega20_device_info },        /* Vega20 */
 450        { 0x66a2, &vega20_device_info },        /* Vega20 */
 451        { 0x66a3, &vega20_device_info },        /* Vega20 */
 452        { 0x66a4, &vega20_device_info },        /* Vega20 */
 453        { 0x66a7, &vega20_device_info },        /* Vega20 */
 454        { 0x66af, &vega20_device_info },        /* Vega20 */
 455        /* Navi10 */
 456        { 0x7310, &navi10_device_info },        /* Navi10 */
 457        { 0x7312, &navi10_device_info },        /* Navi10 */
 458        { 0x7318, &navi10_device_info },        /* Navi10 */
 459        { 0x731a, &navi10_device_info },        /* Navi10 */
 460        { 0x731f, &navi10_device_info },        /* Navi10 */
 461};
 462
 463static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 464                                unsigned int chunk_size);
 465static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 466
 467static int kfd_resume(struct kfd_dev *kfd);
 468
 469static const struct kfd_device_info *lookup_device_info(unsigned short did)
 470{
 471        size_t i;
 472
 473        for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
 474                if (supported_devices[i].did == did) {
 475                        WARN_ON(!supported_devices[i].device_info);
 476                        return supported_devices[i].device_info;
 477                }
 478        }
 479
 480        dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
 481                 did);
 482
 483        return NULL;
 484}
 485
 486struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 487        struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
 488{
 489        struct kfd_dev *kfd;
 490        const struct kfd_device_info *device_info =
 491                                        lookup_device_info(pdev->device);
 492
 493        if (!device_info) {
 494                dev_err(kfd_device, "kgd2kfd_probe failed\n");
 495                return NULL;
 496        }
 497
 498        kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
 499        if (!kfd)
 500                return NULL;
 501
 502        /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
 503         * 32 and 64-bit requests are possible and must be
 504         * supported.
 505         */
 506        kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
 507        if (device_info->needs_pci_atomics &&
 508            !kfd->pci_atomic_requested) {
 509                dev_info(kfd_device,
 510                         "skipped device %x:%x, PCI rejects atomics\n",
 511                         pdev->vendor, pdev->device);
 512                kfree(kfd);
 513                return NULL;
 514        }
 515
 516        kfd->kgd = kgd;
 517        kfd->device_info = device_info;
 518        kfd->pdev = pdev;
 519        kfd->init_complete = false;
 520        kfd->kfd2kgd = f2g;
 521        atomic_set(&kfd->compute_profile, 0);
 522
 523        mutex_init(&kfd->doorbell_mutex);
 524        memset(&kfd->doorbell_available_index, 0,
 525                sizeof(kfd->doorbell_available_index));
 526
 527        atomic_set(&kfd->sram_ecc_flag, 0);
 528
 529        return kfd;
 530}
 531
 532static void kfd_cwsr_init(struct kfd_dev *kfd)
 533{
 534        if (cwsr_enable && kfd->device_info->supports_cwsr) {
 535                if (kfd->device_info->asic_family < CHIP_VEGA10) {
 536                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
 537                        kfd->cwsr_isa = cwsr_trap_gfx8_hex;
 538                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
 539                } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
 540                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
 541                        kfd->cwsr_isa = cwsr_trap_gfx9_hex;
 542                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
 543                } else {
 544                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
 545                        kfd->cwsr_isa = cwsr_trap_gfx10_hex;
 546                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
 547                }
 548
 549                kfd->cwsr_enabled = true;
 550        }
 551}
 552
 553bool kgd2kfd_device_init(struct kfd_dev *kfd,
 554                         const struct kgd2kfd_shared_resources *gpu_resources)
 555{
 556        unsigned int size;
 557
 558        kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 559                        KGD_ENGINE_MEC1);
 560        kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 561                        KGD_ENGINE_SDMA1);
 562        kfd->shared_resources = *gpu_resources;
 563
 564        kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
 565        kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
 566        kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
 567                        - kfd->vm_info.first_vmid_kfd + 1;
 568
 569        /* Verify module parameters regarding mapped process number*/
 570        if ((hws_max_conc_proc < 0)
 571                        || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
 572                dev_err(kfd_device,
 573                        "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
 574                        hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
 575                        kfd->vm_info.vmid_num_kfd);
 576                kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
 577        } else
 578                kfd->max_proc_per_quantum = hws_max_conc_proc;
 579
 580        /* Allocate global GWS that is shared by all KFD processes */
 581        if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
 582                        amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
 583                dev_err(kfd_device, "Could not allocate %d gws\n",
 584                        amdgpu_amdkfd_get_num_gws(kfd->kgd));
 585                goto out;
 586        }
 587        /* calculate max size of mqds needed for queues */
 588        size = max_num_of_queues_per_device *
 589                        kfd->device_info->mqd_size_aligned;
 590
 591        /*
 592         * calculate max size of runlist packet.
 593         * There can be only 2 packets at once
 594         */
 595        size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
 596                max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
 597                + sizeof(struct pm4_mes_runlist)) * 2;
 598
 599        /* Add size of HIQ & DIQ */
 600        size += KFD_KERNEL_QUEUE_SIZE * 2;
 601
 602        /* add another 512KB for all other allocations on gart (HPD, fences) */
 603        size += 512 * 1024;
 604
 605        if (amdgpu_amdkfd_alloc_gtt_mem(
 606                        kfd->kgd, size, &kfd->gtt_mem,
 607                        &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
 608                        false)) {
 609                dev_err(kfd_device, "Could not allocate %d bytes\n", size);
 610                goto alloc_gtt_mem_failure;
 611        }
 612
 613        dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
 614
 615        /* Initialize GTT sa with 512 byte chunk size */
 616        if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
 617                dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
 618                goto kfd_gtt_sa_init_error;
 619        }
 620
 621        if (kfd_doorbell_init(kfd)) {
 622                dev_err(kfd_device,
 623                        "Error initializing doorbell aperture\n");
 624                goto kfd_doorbell_error;
 625        }
 626
 627        if (kfd->kfd2kgd->get_hive_id)
 628                kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
 629
 630        if (kfd_interrupt_init(kfd)) {
 631                dev_err(kfd_device, "Error initializing interrupts\n");
 632                goto kfd_interrupt_error;
 633        }
 634
 635        kfd->dqm = device_queue_manager_init(kfd);
 636        if (!kfd->dqm) {
 637                dev_err(kfd_device, "Error initializing queue manager\n");
 638                goto device_queue_manager_error;
 639        }
 640
 641        if (kfd_iommu_device_init(kfd)) {
 642                dev_err(kfd_device, "Error initializing iommuv2\n");
 643                goto device_iommu_error;
 644        }
 645
 646        kfd_cwsr_init(kfd);
 647
 648        if (kfd_resume(kfd))
 649                goto kfd_resume_error;
 650
 651        kfd->dbgmgr = NULL;
 652
 653        if (kfd_topology_add_device(kfd)) {
 654                dev_err(kfd_device, "Error adding device to topology\n");
 655                goto kfd_topology_add_device_error;
 656        }
 657
 658        kfd->init_complete = true;
 659        dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
 660                 kfd->pdev->device);
 661
 662        pr_debug("Starting kfd with the following scheduling policy %d\n",
 663                kfd->dqm->sched_policy);
 664
 665        goto out;
 666
 667kfd_topology_add_device_error:
 668kfd_resume_error:
 669device_iommu_error:
 670        device_queue_manager_uninit(kfd->dqm);
 671device_queue_manager_error:
 672        kfd_interrupt_exit(kfd);
 673kfd_interrupt_error:
 674        kfd_doorbell_fini(kfd);
 675kfd_doorbell_error:
 676        kfd_gtt_sa_fini(kfd);
 677kfd_gtt_sa_init_error:
 678        amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 679alloc_gtt_mem_failure:
 680        if (hws_gws_support)
 681                amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 682        dev_err(kfd_device,
 683                "device %x:%x NOT added due to errors\n",
 684                kfd->pdev->vendor, kfd->pdev->device);
 685out:
 686        return kfd->init_complete;
 687}
 688
 689void kgd2kfd_device_exit(struct kfd_dev *kfd)
 690{
 691        if (kfd->init_complete) {
 692                kgd2kfd_suspend(kfd);
 693                device_queue_manager_uninit(kfd->dqm);
 694                kfd_interrupt_exit(kfd);
 695                kfd_topology_remove_device(kfd);
 696                kfd_doorbell_fini(kfd);
 697                kfd_gtt_sa_fini(kfd);
 698                amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 699                if (hws_gws_support)
 700                        amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 701        }
 702
 703        kfree(kfd);
 704}
 705
 706int kgd2kfd_pre_reset(struct kfd_dev *kfd)
 707{
 708        if (!kfd->init_complete)
 709                return 0;
 710        kgd2kfd_suspend(kfd);
 711
 712        /* hold dqm->lock to prevent further execution*/
 713        dqm_lock(kfd->dqm);
 714
 715        kfd_signal_reset_event(kfd);
 716        return 0;
 717}
 718
 719/*
 720 * Fix me. KFD won't be able to resume existing process for now.
 721 * We will keep all existing process in a evicted state and
 722 * wait the process to be terminated.
 723 */
 724
 725int kgd2kfd_post_reset(struct kfd_dev *kfd)
 726{
 727        int ret, count;
 728
 729        if (!kfd->init_complete)
 730                return 0;
 731
 732        dqm_unlock(kfd->dqm);
 733
 734        ret = kfd_resume(kfd);
 735        if (ret)
 736                return ret;
 737        count = atomic_dec_return(&kfd_locked);
 738
 739        atomic_set(&kfd->sram_ecc_flag, 0);
 740
 741        return 0;
 742}
 743
 744bool kfd_is_locked(void)
 745{
 746        return  (atomic_read(&kfd_locked) > 0);
 747}
 748
 749void kgd2kfd_suspend(struct kfd_dev *kfd)
 750{
 751        if (!kfd->init_complete)
 752                return;
 753
 754        /* For first KFD device suspend all the KFD processes */
 755        if (atomic_inc_return(&kfd_locked) == 1)
 756                kfd_suspend_all_processes();
 757
 758        kfd->dqm->ops.stop(kfd->dqm);
 759
 760        kfd_iommu_suspend(kfd);
 761}
 762
 763int kgd2kfd_resume(struct kfd_dev *kfd)
 764{
 765        int ret, count;
 766
 767        if (!kfd->init_complete)
 768                return 0;
 769
 770        ret = kfd_resume(kfd);
 771        if (ret)
 772                return ret;
 773
 774        count = atomic_dec_return(&kfd_locked);
 775        WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
 776        if (count == 0)
 777                ret = kfd_resume_all_processes();
 778
 779        return ret;
 780}
 781
 782static int kfd_resume(struct kfd_dev *kfd)
 783{
 784        int err = 0;
 785
 786        err = kfd_iommu_resume(kfd);
 787        if (err) {
 788                dev_err(kfd_device,
 789                        "Failed to resume IOMMU for device %x:%x\n",
 790                        kfd->pdev->vendor, kfd->pdev->device);
 791                return err;
 792        }
 793
 794        err = kfd->dqm->ops.start(kfd->dqm);
 795        if (err) {
 796                dev_err(kfd_device,
 797                        "Error starting queue manager for device %x:%x\n",
 798                        kfd->pdev->vendor, kfd->pdev->device);
 799                goto dqm_start_error;
 800        }
 801
 802        return err;
 803
 804dqm_start_error:
 805        kfd_iommu_suspend(kfd);
 806        return err;
 807}
 808
 809/* This is called directly from KGD at ISR. */
 810void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 811{
 812        uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
 813        bool is_patched = false;
 814        unsigned long flags;
 815
 816        if (!kfd->init_complete)
 817                return;
 818
 819        if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
 820                dev_err_once(kfd_device, "Ring entry too small\n");
 821                return;
 822        }
 823
 824        spin_lock_irqsave(&kfd->interrupt_lock, flags);
 825
 826        if (kfd->interrupts_active
 827            && interrupt_is_wanted(kfd, ih_ring_entry,
 828                                   patched_ihre, &is_patched)
 829            && enqueue_ih_ring_entry(kfd,
 830                                     is_patched ? patched_ihre : ih_ring_entry))
 831                queue_work(kfd->ih_wq, &kfd->interrupt_work);
 832
 833        spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
 834}
 835
 836int kgd2kfd_quiesce_mm(struct mm_struct *mm)
 837{
 838        struct kfd_process *p;
 839        int r;
 840
 841        /* Because we are called from arbitrary context (workqueue) as opposed
 842         * to process context, kfd_process could attempt to exit while we are
 843         * running so the lookup function increments the process ref count.
 844         */
 845        p = kfd_lookup_process_by_mm(mm);
 846        if (!p)
 847                return -ESRCH;
 848
 849        r = kfd_process_evict_queues(p);
 850
 851        kfd_unref_process(p);
 852        return r;
 853}
 854
 855int kgd2kfd_resume_mm(struct mm_struct *mm)
 856{
 857        struct kfd_process *p;
 858        int r;
 859
 860        /* Because we are called from arbitrary context (workqueue) as opposed
 861         * to process context, kfd_process could attempt to exit while we are
 862         * running so the lookup function increments the process ref count.
 863         */
 864        p = kfd_lookup_process_by_mm(mm);
 865        if (!p)
 866                return -ESRCH;
 867
 868        r = kfd_process_restore_queues(p);
 869
 870        kfd_unref_process(p);
 871        return r;
 872}
 873
 874/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
 875 *   prepare for safe eviction of KFD BOs that belong to the specified
 876 *   process.
 877 *
 878 * @mm: mm_struct that identifies the specified KFD process
 879 * @fence: eviction fence attached to KFD process BOs
 880 *
 881 */
 882int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 883                                               struct dma_fence *fence)
 884{
 885        struct kfd_process *p;
 886        unsigned long active_time;
 887        unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
 888
 889        if (!fence)
 890                return -EINVAL;
 891
 892        if (dma_fence_is_signaled(fence))
 893                return 0;
 894
 895        p = kfd_lookup_process_by_mm(mm);
 896        if (!p)
 897                return -ENODEV;
 898
 899        if (fence->seqno == p->last_eviction_seqno)
 900                goto out;
 901
 902        p->last_eviction_seqno = fence->seqno;
 903
 904        /* Avoid KFD process starvation. Wait for at least
 905         * PROCESS_ACTIVE_TIME_MS before evicting the process again
 906         */
 907        active_time = get_jiffies_64() - p->last_restore_timestamp;
 908        if (delay_jiffies > active_time)
 909                delay_jiffies -= active_time;
 910        else
 911                delay_jiffies = 0;
 912
 913        /* During process initialization eviction_work.dwork is initialized
 914         * to kfd_evict_bo_worker
 915         */
 916        schedule_delayed_work(&p->eviction_work, delay_jiffies);
 917out:
 918        kfd_unref_process(p);
 919        return 0;
 920}
 921
 922static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 923                                unsigned int chunk_size)
 924{
 925        unsigned int num_of_longs;
 926
 927        if (WARN_ON(buf_size < chunk_size))
 928                return -EINVAL;
 929        if (WARN_ON(buf_size == 0))
 930                return -EINVAL;
 931        if (WARN_ON(chunk_size == 0))
 932                return -EINVAL;
 933
 934        kfd->gtt_sa_chunk_size = chunk_size;
 935        kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
 936
 937        num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
 938                BITS_PER_LONG;
 939
 940        kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
 941
 942        if (!kfd->gtt_sa_bitmap)
 943                return -ENOMEM;
 944
 945        pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
 946                        kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
 947
 948        mutex_init(&kfd->gtt_sa_lock);
 949
 950        return 0;
 951
 952}
 953
 954static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
 955{
 956        mutex_destroy(&kfd->gtt_sa_lock);
 957        kfree(kfd->gtt_sa_bitmap);
 958}
 959
 960static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
 961                                                unsigned int bit_num,
 962                                                unsigned int chunk_size)
 963{
 964        return start_addr + bit_num * chunk_size;
 965}
 966
 967static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
 968                                                unsigned int bit_num,
 969                                                unsigned int chunk_size)
 970{
 971        return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
 972}
 973
 974int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 975                        struct kfd_mem_obj **mem_obj)
 976{
 977        unsigned int found, start_search, cur_size;
 978
 979        if (size == 0)
 980                return -EINVAL;
 981
 982        if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
 983                return -ENOMEM;
 984
 985        *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
 986        if (!(*mem_obj))
 987                return -ENOMEM;
 988
 989        pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
 990
 991        start_search = 0;
 992
 993        mutex_lock(&kfd->gtt_sa_lock);
 994
 995kfd_gtt_restart_search:
 996        /* Find the first chunk that is free */
 997        found = find_next_zero_bit(kfd->gtt_sa_bitmap,
 998                                        kfd->gtt_sa_num_of_chunks,
 999                                        start_search);
1000
1001        pr_debug("Found = %d\n", found);
1002
1003        /* If there wasn't any free chunk, bail out */
1004        if (found == kfd->gtt_sa_num_of_chunks)
1005                goto kfd_gtt_no_free_chunk;
1006
1007        /* Update fields of mem_obj */
1008        (*mem_obj)->range_start = found;
1009        (*mem_obj)->range_end = found;
1010        (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1011                                        kfd->gtt_start_gpu_addr,
1012                                        found,
1013                                        kfd->gtt_sa_chunk_size);
1014        (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1015                                        kfd->gtt_start_cpu_ptr,
1016                                        found,
1017                                        kfd->gtt_sa_chunk_size);
1018
1019        pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1020                        (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1021
1022        /* If we need only one chunk, mark it as allocated and get out */
1023        if (size <= kfd->gtt_sa_chunk_size) {
1024                pr_debug("Single bit\n");
1025                set_bit(found, kfd->gtt_sa_bitmap);
1026                goto kfd_gtt_out;
1027        }
1028
1029        /* Otherwise, try to see if we have enough contiguous chunks */
1030        cur_size = size - kfd->gtt_sa_chunk_size;
1031        do {
1032                (*mem_obj)->range_end =
1033                        find_next_zero_bit(kfd->gtt_sa_bitmap,
1034                                        kfd->gtt_sa_num_of_chunks, ++found);
1035                /*
1036                 * If next free chunk is not contiguous than we need to
1037                 * restart our search from the last free chunk we found (which
1038                 * wasn't contiguous to the previous ones
1039                 */
1040                if ((*mem_obj)->range_end != found) {
1041                        start_search = found;
1042                        goto kfd_gtt_restart_search;
1043                }
1044
1045                /*
1046                 * If we reached end of buffer, bail out with error
1047                 */
1048                if (found == kfd->gtt_sa_num_of_chunks)
1049                        goto kfd_gtt_no_free_chunk;
1050
1051                /* Check if we don't need another chunk */
1052                if (cur_size <= kfd->gtt_sa_chunk_size)
1053                        cur_size = 0;
1054                else
1055                        cur_size -= kfd->gtt_sa_chunk_size;
1056
1057        } while (cur_size > 0);
1058
1059        pr_debug("range_start = %d, range_end = %d\n",
1060                (*mem_obj)->range_start, (*mem_obj)->range_end);
1061
1062        /* Mark the chunks as allocated */
1063        for (found = (*mem_obj)->range_start;
1064                found <= (*mem_obj)->range_end;
1065                found++)
1066                set_bit(found, kfd->gtt_sa_bitmap);
1067
1068kfd_gtt_out:
1069        mutex_unlock(&kfd->gtt_sa_lock);
1070        return 0;
1071
1072kfd_gtt_no_free_chunk:
1073        pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
1074        mutex_unlock(&kfd->gtt_sa_lock);
1075        kfree(mem_obj);
1076        return -ENOMEM;
1077}
1078
1079int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1080{
1081        unsigned int bit;
1082
1083        /* Act like kfree when trying to free a NULL object */
1084        if (!mem_obj)
1085                return 0;
1086
1087        pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1088                        mem_obj, mem_obj->range_start, mem_obj->range_end);
1089
1090        mutex_lock(&kfd->gtt_sa_lock);
1091
1092        /* Mark the chunks as free */
1093        for (bit = mem_obj->range_start;
1094                bit <= mem_obj->range_end;
1095                bit++)
1096                clear_bit(bit, kfd->gtt_sa_bitmap);
1097
1098        mutex_unlock(&kfd->gtt_sa_lock);
1099
1100        kfree(mem_obj);
1101        return 0;
1102}
1103
1104void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1105{
1106        if (kfd)
1107                atomic_inc(&kfd->sram_ecc_flag);
1108}
1109
1110void kfd_inc_compute_active(struct kfd_dev *kfd)
1111{
1112        if (atomic_inc_return(&kfd->compute_profile) == 1)
1113                amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
1114}
1115
1116void kfd_dec_compute_active(struct kfd_dev *kfd)
1117{
1118        int count = atomic_dec_return(&kfd->compute_profile);
1119
1120        if (count == 0)
1121                amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
1122        WARN_ONCE(count < 0, "Compute profile ref. count error");
1123}
1124
1125#if defined(CONFIG_DEBUG_FS)
1126
1127/* This function will send a package to HIQ to hang the HWS
1128 * which will trigger a GPU reset and bring the HWS back to normal state
1129 */
1130int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1131{
1132        int r = 0;
1133
1134        if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1135                pr_err("HWS is not enabled");
1136                return -EINVAL;
1137        }
1138
1139        r = pm_debugfs_hang_hws(&dev->dqm->packets);
1140        if (!r)
1141                r = dqm_debugfs_execute_queues(dev->dqm);
1142
1143        return r;
1144}
1145
1146#endif
1147