linux/drivers/gpu/drm/amd/amdkfd/kfd_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/bsearch.h>
  24#include <linux/pci.h>
  25#include <linux/slab.h>
  26#include "kfd_priv.h"
  27#include "kfd_device_queue_manager.h"
  28#include "kfd_pm4_headers_vi.h"
  29#include "kfd_pm4_headers_aldebaran.h"
  30#include "cwsr_trap_handler.h"
  31#include "kfd_iommu.h"
  32#include "amdgpu_amdkfd.h"
  33#include "kfd_smi_events.h"
  34#include "kfd_migrate.h"
  35#include "amdgpu.h"
  36
  37#define MQD_SIZE_ALIGNED 768
  38
  39/*
  40 * kfd_locked is used to lock the kfd driver during suspend or reset
  41 * once locked, kfd driver will stop any further GPU execution.
  42 * create process (open) will return -EAGAIN.
  43 */
  44static atomic_t kfd_locked = ATOMIC_INIT(0);
  45
  46#ifdef CONFIG_DRM_AMDGPU_CIK
  47extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
  48#endif
  49extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
  50extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
  51extern const struct kfd2kgd_calls arcturus_kfd2kgd;
  52extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
  53extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
  54extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
  55
  56#ifdef KFD_SUPPORT_IOMMU_V2
  57static const struct kfd_device_info kaveri_device_info = {
  58        .asic_family = CHIP_KAVERI,
  59        .asic_name = "kaveri",
  60        .gfx_target_version = 70000,
  61        .max_pasid_bits = 16,
  62        /* max num of queues for KV.TODO should be a dynamic value */
  63        .max_no_of_hqd  = 24,
  64        .doorbell_size  = 4,
  65        .ih_ring_entry_size = 4 * sizeof(uint32_t),
  66        .event_interrupt_class = &event_interrupt_class_cik,
  67        .num_of_watch_points = 4,
  68        .mqd_size_aligned = MQD_SIZE_ALIGNED,
  69        .supports_cwsr = false,
  70        .needs_iommu_device = true,
  71        .needs_pci_atomics = false,
  72        .num_sdma_engines = 2,
  73        .num_xgmi_sdma_engines = 0,
  74        .num_sdma_queues_per_engine = 2,
  75};
  76
  77static const struct kfd_device_info carrizo_device_info = {
  78        .asic_family = CHIP_CARRIZO,
  79        .asic_name = "carrizo",
  80        .gfx_target_version = 80001,
  81        .max_pasid_bits = 16,
  82        /* max num of queues for CZ.TODO should be a dynamic value */
  83        .max_no_of_hqd  = 24,
  84        .doorbell_size  = 4,
  85        .ih_ring_entry_size = 4 * sizeof(uint32_t),
  86        .event_interrupt_class = &event_interrupt_class_cik,
  87        .num_of_watch_points = 4,
  88        .mqd_size_aligned = MQD_SIZE_ALIGNED,
  89        .supports_cwsr = true,
  90        .needs_iommu_device = true,
  91        .needs_pci_atomics = false,
  92        .num_sdma_engines = 2,
  93        .num_xgmi_sdma_engines = 0,
  94        .num_sdma_queues_per_engine = 2,
  95};
  96
  97static const struct kfd_device_info raven_device_info = {
  98        .asic_family = CHIP_RAVEN,
  99        .asic_name = "raven",
 100        .gfx_target_version = 90002,
 101        .max_pasid_bits = 16,
 102        .max_no_of_hqd  = 24,
 103        .doorbell_size  = 8,
 104        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 105        .event_interrupt_class = &event_interrupt_class_v9,
 106        .num_of_watch_points = 4,
 107        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 108        .supports_cwsr = true,
 109        .needs_iommu_device = true,
 110        .needs_pci_atomics = true,
 111        .num_sdma_engines = 1,
 112        .num_xgmi_sdma_engines = 0,
 113        .num_sdma_queues_per_engine = 2,
 114};
 115#endif
 116
 117#ifdef CONFIG_DRM_AMDGPU_CIK
 118static const struct kfd_device_info hawaii_device_info = {
 119        .asic_family = CHIP_HAWAII,
 120        .asic_name = "hawaii",
 121        .gfx_target_version = 70001,
 122        .max_pasid_bits = 16,
 123        /* max num of queues for KV.TODO should be a dynamic value */
 124        .max_no_of_hqd  = 24,
 125        .doorbell_size  = 4,
 126        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 127        .event_interrupt_class = &event_interrupt_class_cik,
 128        .num_of_watch_points = 4,
 129        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 130        .supports_cwsr = false,
 131        .needs_iommu_device = false,
 132        .needs_pci_atomics = false,
 133        .num_sdma_engines = 2,
 134        .num_xgmi_sdma_engines = 0,
 135        .num_sdma_queues_per_engine = 2,
 136};
 137#endif
 138
 139static const struct kfd_device_info tonga_device_info = {
 140        .asic_family = CHIP_TONGA,
 141        .asic_name = "tonga",
 142        .gfx_target_version = 80002,
 143        .max_pasid_bits = 16,
 144        .max_no_of_hqd  = 24,
 145        .doorbell_size  = 4,
 146        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 147        .event_interrupt_class = &event_interrupt_class_cik,
 148        .num_of_watch_points = 4,
 149        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 150        .supports_cwsr = false,
 151        .needs_iommu_device = false,
 152        .needs_pci_atomics = true,
 153        .num_sdma_engines = 2,
 154        .num_xgmi_sdma_engines = 0,
 155        .num_sdma_queues_per_engine = 2,
 156};
 157
 158static const struct kfd_device_info fiji_device_info = {
 159        .asic_family = CHIP_FIJI,
 160        .asic_name = "fiji",
 161        .gfx_target_version = 80003,
 162        .max_pasid_bits = 16,
 163        .max_no_of_hqd  = 24,
 164        .doorbell_size  = 4,
 165        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 166        .event_interrupt_class = &event_interrupt_class_cik,
 167        .num_of_watch_points = 4,
 168        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 169        .supports_cwsr = true,
 170        .needs_iommu_device = false,
 171        .needs_pci_atomics = true,
 172        .num_sdma_engines = 2,
 173        .num_xgmi_sdma_engines = 0,
 174        .num_sdma_queues_per_engine = 2,
 175};
 176
 177static const struct kfd_device_info fiji_vf_device_info = {
 178        .asic_family = CHIP_FIJI,
 179        .asic_name = "fiji",
 180        .gfx_target_version = 80003,
 181        .max_pasid_bits = 16,
 182        .max_no_of_hqd  = 24,
 183        .doorbell_size  = 4,
 184        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 185        .event_interrupt_class = &event_interrupt_class_cik,
 186        .num_of_watch_points = 4,
 187        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 188        .supports_cwsr = true,
 189        .needs_iommu_device = false,
 190        .needs_pci_atomics = false,
 191        .num_sdma_engines = 2,
 192        .num_xgmi_sdma_engines = 0,
 193        .num_sdma_queues_per_engine = 2,
 194};
 195
 196
 197static const struct kfd_device_info polaris10_device_info = {
 198        .asic_family = CHIP_POLARIS10,
 199        .asic_name = "polaris10",
 200        .gfx_target_version = 80003,
 201        .max_pasid_bits = 16,
 202        .max_no_of_hqd  = 24,
 203        .doorbell_size  = 4,
 204        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 205        .event_interrupt_class = &event_interrupt_class_cik,
 206        .num_of_watch_points = 4,
 207        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 208        .supports_cwsr = true,
 209        .needs_iommu_device = false,
 210        .needs_pci_atomics = true,
 211        .num_sdma_engines = 2,
 212        .num_xgmi_sdma_engines = 0,
 213        .num_sdma_queues_per_engine = 2,
 214};
 215
 216static const struct kfd_device_info polaris10_vf_device_info = {
 217        .asic_family = CHIP_POLARIS10,
 218        .asic_name = "polaris10",
 219        .gfx_target_version = 80003,
 220        .max_pasid_bits = 16,
 221        .max_no_of_hqd  = 24,
 222        .doorbell_size  = 4,
 223        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 224        .event_interrupt_class = &event_interrupt_class_cik,
 225        .num_of_watch_points = 4,
 226        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 227        .supports_cwsr = true,
 228        .needs_iommu_device = false,
 229        .needs_pci_atomics = false,
 230        .num_sdma_engines = 2,
 231        .num_xgmi_sdma_engines = 0,
 232        .num_sdma_queues_per_engine = 2,
 233};
 234
 235static const struct kfd_device_info polaris11_device_info = {
 236        .asic_family = CHIP_POLARIS11,
 237        .asic_name = "polaris11",
 238        .gfx_target_version = 80003,
 239        .max_pasid_bits = 16,
 240        .max_no_of_hqd  = 24,
 241        .doorbell_size  = 4,
 242        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 243        .event_interrupt_class = &event_interrupt_class_cik,
 244        .num_of_watch_points = 4,
 245        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 246        .supports_cwsr = true,
 247        .needs_iommu_device = false,
 248        .needs_pci_atomics = true,
 249        .num_sdma_engines = 2,
 250        .num_xgmi_sdma_engines = 0,
 251        .num_sdma_queues_per_engine = 2,
 252};
 253
 254static const struct kfd_device_info polaris12_device_info = {
 255        .asic_family = CHIP_POLARIS12,
 256        .asic_name = "polaris12",
 257        .gfx_target_version = 80003,
 258        .max_pasid_bits = 16,
 259        .max_no_of_hqd  = 24,
 260        .doorbell_size  = 4,
 261        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 262        .event_interrupt_class = &event_interrupt_class_cik,
 263        .num_of_watch_points = 4,
 264        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 265        .supports_cwsr = true,
 266        .needs_iommu_device = false,
 267        .needs_pci_atomics = true,
 268        .num_sdma_engines = 2,
 269        .num_xgmi_sdma_engines = 0,
 270        .num_sdma_queues_per_engine = 2,
 271};
 272
 273static const struct kfd_device_info vegam_device_info = {
 274        .asic_family = CHIP_VEGAM,
 275        .asic_name = "vegam",
 276        .gfx_target_version = 80003,
 277        .max_pasid_bits = 16,
 278        .max_no_of_hqd  = 24,
 279        .doorbell_size  = 4,
 280        .ih_ring_entry_size = 4 * sizeof(uint32_t),
 281        .event_interrupt_class = &event_interrupt_class_cik,
 282        .num_of_watch_points = 4,
 283        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 284        .supports_cwsr = true,
 285        .needs_iommu_device = false,
 286        .needs_pci_atomics = true,
 287        .num_sdma_engines = 2,
 288        .num_xgmi_sdma_engines = 0,
 289        .num_sdma_queues_per_engine = 2,
 290};
 291
 292static const struct kfd_device_info vega10_device_info = {
 293        .asic_family = CHIP_VEGA10,
 294        .asic_name = "vega10",
 295        .gfx_target_version = 90000,
 296        .max_pasid_bits = 16,
 297        .max_no_of_hqd  = 24,
 298        .doorbell_size  = 8,
 299        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 300        .event_interrupt_class = &event_interrupt_class_v9,
 301        .num_of_watch_points = 4,
 302        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 303        .supports_cwsr = true,
 304        .needs_iommu_device = false,
 305        .needs_pci_atomics = false,
 306        .num_sdma_engines = 2,
 307        .num_xgmi_sdma_engines = 0,
 308        .num_sdma_queues_per_engine = 2,
 309};
 310
 311static const struct kfd_device_info vega10_vf_device_info = {
 312        .asic_family = CHIP_VEGA10,
 313        .asic_name = "vega10",
 314        .gfx_target_version = 90000,
 315        .max_pasid_bits = 16,
 316        .max_no_of_hqd  = 24,
 317        .doorbell_size  = 8,
 318        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 319        .event_interrupt_class = &event_interrupt_class_v9,
 320        .num_of_watch_points = 4,
 321        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 322        .supports_cwsr = true,
 323        .needs_iommu_device = false,
 324        .needs_pci_atomics = false,
 325        .num_sdma_engines = 2,
 326        .num_xgmi_sdma_engines = 0,
 327        .num_sdma_queues_per_engine = 2,
 328};
 329
 330static const struct kfd_device_info vega12_device_info = {
 331        .asic_family = CHIP_VEGA12,
 332        .asic_name = "vega12",
 333        .gfx_target_version = 90004,
 334        .max_pasid_bits = 16,
 335        .max_no_of_hqd  = 24,
 336        .doorbell_size  = 8,
 337        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 338        .event_interrupt_class = &event_interrupt_class_v9,
 339        .num_of_watch_points = 4,
 340        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 341        .supports_cwsr = true,
 342        .needs_iommu_device = false,
 343        .needs_pci_atomics = false,
 344        .num_sdma_engines = 2,
 345        .num_xgmi_sdma_engines = 0,
 346        .num_sdma_queues_per_engine = 2,
 347};
 348
 349static const struct kfd_device_info vega20_device_info = {
 350        .asic_family = CHIP_VEGA20,
 351        .asic_name = "vega20",
 352        .gfx_target_version = 90006,
 353        .max_pasid_bits = 16,
 354        .max_no_of_hqd  = 24,
 355        .doorbell_size  = 8,
 356        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 357        .event_interrupt_class = &event_interrupt_class_v9,
 358        .num_of_watch_points = 4,
 359        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 360        .supports_cwsr = true,
 361        .needs_iommu_device = false,
 362        .needs_pci_atomics = false,
 363        .num_sdma_engines = 2,
 364        .num_xgmi_sdma_engines = 0,
 365        .num_sdma_queues_per_engine = 8,
 366};
 367
 368static const struct kfd_device_info arcturus_device_info = {
 369        .asic_family = CHIP_ARCTURUS,
 370        .asic_name = "arcturus",
 371        .gfx_target_version = 90008,
 372        .max_pasid_bits = 16,
 373        .max_no_of_hqd  = 24,
 374        .doorbell_size  = 8,
 375        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 376        .event_interrupt_class = &event_interrupt_class_v9,
 377        .num_of_watch_points = 4,
 378        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 379        .supports_cwsr = true,
 380        .needs_iommu_device = false,
 381        .needs_pci_atomics = false,
 382        .num_sdma_engines = 2,
 383        .num_xgmi_sdma_engines = 6,
 384        .num_sdma_queues_per_engine = 8,
 385};
 386
 387static const struct kfd_device_info aldebaran_device_info = {
 388        .asic_family = CHIP_ALDEBARAN,
 389        .asic_name = "aldebaran",
 390        .gfx_target_version = 90010,
 391        .max_pasid_bits = 16,
 392        .max_no_of_hqd  = 24,
 393        .doorbell_size  = 8,
 394        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 395        .event_interrupt_class = &event_interrupt_class_v9,
 396        .num_of_watch_points = 4,
 397        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 398        .supports_cwsr = true,
 399        .needs_iommu_device = false,
 400        .needs_pci_atomics = false,
 401        .num_sdma_engines = 2,
 402        .num_xgmi_sdma_engines = 3,
 403        .num_sdma_queues_per_engine = 8,
 404};
 405
 406static const struct kfd_device_info renoir_device_info = {
 407        .asic_family = CHIP_RENOIR,
 408        .asic_name = "renoir",
 409        .gfx_target_version = 90012,
 410        .max_pasid_bits = 16,
 411        .max_no_of_hqd  = 24,
 412        .doorbell_size  = 8,
 413        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 414        .event_interrupt_class = &event_interrupt_class_v9,
 415        .num_of_watch_points = 4,
 416        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 417        .supports_cwsr = true,
 418        .needs_iommu_device = false,
 419        .needs_pci_atomics = false,
 420        .num_sdma_engines = 1,
 421        .num_xgmi_sdma_engines = 0,
 422        .num_sdma_queues_per_engine = 2,
 423};
 424
 425static const struct kfd_device_info navi10_device_info = {
 426        .asic_family = CHIP_NAVI10,
 427        .asic_name = "navi10",
 428        .gfx_target_version = 100100,
 429        .max_pasid_bits = 16,
 430        .max_no_of_hqd  = 24,
 431        .doorbell_size  = 8,
 432        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 433        .event_interrupt_class = &event_interrupt_class_v9,
 434        .num_of_watch_points = 4,
 435        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 436        .needs_iommu_device = false,
 437        .supports_cwsr = true,
 438        .needs_pci_atomics = true,
 439        .no_atomic_fw_version = 145,
 440        .num_sdma_engines = 2,
 441        .num_xgmi_sdma_engines = 0,
 442        .num_sdma_queues_per_engine = 8,
 443};
 444
 445static const struct kfd_device_info navi12_device_info = {
 446        .asic_family = CHIP_NAVI12,
 447        .asic_name = "navi12",
 448        .gfx_target_version = 100101,
 449        .max_pasid_bits = 16,
 450        .max_no_of_hqd  = 24,
 451        .doorbell_size  = 8,
 452        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 453        .event_interrupt_class = &event_interrupt_class_v9,
 454        .num_of_watch_points = 4,
 455        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 456        .needs_iommu_device = false,
 457        .supports_cwsr = true,
 458        .needs_pci_atomics = true,
 459        .no_atomic_fw_version = 145,
 460        .num_sdma_engines = 2,
 461        .num_xgmi_sdma_engines = 0,
 462        .num_sdma_queues_per_engine = 8,
 463};
 464
 465static const struct kfd_device_info navi14_device_info = {
 466        .asic_family = CHIP_NAVI14,
 467        .asic_name = "navi14",
 468        .gfx_target_version = 100102,
 469        .max_pasid_bits = 16,
 470        .max_no_of_hqd  = 24,
 471        .doorbell_size  = 8,
 472        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 473        .event_interrupt_class = &event_interrupt_class_v9,
 474        .num_of_watch_points = 4,
 475        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 476        .needs_iommu_device = false,
 477        .supports_cwsr = true,
 478        .needs_pci_atomics = true,
 479        .no_atomic_fw_version = 145,
 480        .num_sdma_engines = 2,
 481        .num_xgmi_sdma_engines = 0,
 482        .num_sdma_queues_per_engine = 8,
 483};
 484
 485static const struct kfd_device_info sienna_cichlid_device_info = {
 486        .asic_family = CHIP_SIENNA_CICHLID,
 487        .asic_name = "sienna_cichlid",
 488        .gfx_target_version = 100300,
 489        .max_pasid_bits = 16,
 490        .max_no_of_hqd  = 24,
 491        .doorbell_size  = 8,
 492        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 493        .event_interrupt_class = &event_interrupt_class_v9,
 494        .num_of_watch_points = 4,
 495        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 496        .needs_iommu_device = false,
 497        .supports_cwsr = true,
 498        .needs_pci_atomics = true,
 499        .no_atomic_fw_version = 92,
 500        .num_sdma_engines = 4,
 501        .num_xgmi_sdma_engines = 0,
 502        .num_sdma_queues_per_engine = 8,
 503};
 504
 505static const struct kfd_device_info navy_flounder_device_info = {
 506        .asic_family = CHIP_NAVY_FLOUNDER,
 507        .asic_name = "navy_flounder",
 508        .gfx_target_version = 100301,
 509        .max_pasid_bits = 16,
 510        .max_no_of_hqd  = 24,
 511        .doorbell_size  = 8,
 512        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 513        .event_interrupt_class = &event_interrupt_class_v9,
 514        .num_of_watch_points = 4,
 515        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 516        .needs_iommu_device = false,
 517        .supports_cwsr = true,
 518        .needs_pci_atomics = true,
 519        .no_atomic_fw_version = 92,
 520        .num_sdma_engines = 2,
 521        .num_xgmi_sdma_engines = 0,
 522        .num_sdma_queues_per_engine = 8,
 523};
 524
 525static const struct kfd_device_info vangogh_device_info = {
 526        .asic_family = CHIP_VANGOGH,
 527        .asic_name = "vangogh",
 528        .gfx_target_version = 100303,
 529        .max_pasid_bits = 16,
 530        .max_no_of_hqd  = 24,
 531        .doorbell_size  = 8,
 532        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 533        .event_interrupt_class = &event_interrupt_class_v9,
 534        .num_of_watch_points = 4,
 535        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 536        .needs_iommu_device = false,
 537        .supports_cwsr = true,
 538        .needs_pci_atomics = true,
 539        .no_atomic_fw_version = 92,
 540        .num_sdma_engines = 1,
 541        .num_xgmi_sdma_engines = 0,
 542        .num_sdma_queues_per_engine = 2,
 543};
 544
 545static const struct kfd_device_info dimgrey_cavefish_device_info = {
 546        .asic_family = CHIP_DIMGREY_CAVEFISH,
 547        .asic_name = "dimgrey_cavefish",
 548        .gfx_target_version = 100302,
 549        .max_pasid_bits = 16,
 550        .max_no_of_hqd  = 24,
 551        .doorbell_size  = 8,
 552        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 553        .event_interrupt_class = &event_interrupt_class_v9,
 554        .num_of_watch_points = 4,
 555        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 556        .needs_iommu_device = false,
 557        .supports_cwsr = true,
 558        .needs_pci_atomics = true,
 559        .no_atomic_fw_version = 92,
 560        .num_sdma_engines = 2,
 561        .num_xgmi_sdma_engines = 0,
 562        .num_sdma_queues_per_engine = 8,
 563};
 564
 565static const struct kfd_device_info beige_goby_device_info = {
 566        .asic_family = CHIP_BEIGE_GOBY,
 567        .asic_name = "beige_goby",
 568        .gfx_target_version = 100304,
 569        .max_pasid_bits = 16,
 570        .max_no_of_hqd  = 24,
 571        .doorbell_size  = 8,
 572        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 573        .event_interrupt_class = &event_interrupt_class_v9,
 574        .num_of_watch_points = 4,
 575        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 576        .needs_iommu_device = false,
 577        .supports_cwsr = true,
 578        .needs_pci_atomics = true,
 579        .no_atomic_fw_version = 92,
 580        .num_sdma_engines = 1,
 581        .num_xgmi_sdma_engines = 0,
 582        .num_sdma_queues_per_engine = 8,
 583};
 584
 585static const struct kfd_device_info yellow_carp_device_info = {
 586        .asic_family = CHIP_YELLOW_CARP,
 587        .asic_name = "yellow_carp",
 588        .gfx_target_version = 100305,
 589        .max_pasid_bits = 16,
 590        .max_no_of_hqd  = 24,
 591        .doorbell_size  = 8,
 592        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 593        .event_interrupt_class = &event_interrupt_class_v9,
 594        .num_of_watch_points = 4,
 595        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 596        .needs_iommu_device = false,
 597        .supports_cwsr = true,
 598        .needs_pci_atomics = true,
 599        .no_atomic_fw_version = 92,
 600        .num_sdma_engines = 1,
 601        .num_xgmi_sdma_engines = 0,
 602        .num_sdma_queues_per_engine = 2,
 603};
 604
 605static const struct kfd_device_info cyan_skillfish_device_info = {
 606        .asic_family = CHIP_CYAN_SKILLFISH,
 607        .asic_name = "cyan_skillfish",
 608        .gfx_target_version = 100103,
 609        .max_pasid_bits = 16,
 610        .max_no_of_hqd  = 24,
 611        .doorbell_size  = 8,
 612        .ih_ring_entry_size = 8 * sizeof(uint32_t),
 613        .event_interrupt_class = &event_interrupt_class_v9,
 614        .num_of_watch_points = 4,
 615        .mqd_size_aligned = MQD_SIZE_ALIGNED,
 616        .needs_iommu_device = false,
 617        .supports_cwsr = true,
 618        .needs_pci_atomics = true,
 619        .num_sdma_engines = 2,
 620        .num_xgmi_sdma_engines = 0,
 621        .num_sdma_queues_per_engine = 8,
 622};
 623
 624static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 625                                unsigned int chunk_size);
 626static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 627
 628static int kfd_resume(struct kfd_dev *kfd);
 629
 630struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
 631{
 632        struct kfd_dev *kfd;
 633        const struct kfd_device_info *device_info;
 634        const struct kfd2kgd_calls *f2g;
 635        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 636        struct pci_dev *pdev = adev->pdev;
 637
 638        switch (adev->asic_type) {
 639#ifdef KFD_SUPPORT_IOMMU_V2
 640#ifdef CONFIG_DRM_AMDGPU_CIK
 641        case CHIP_KAVERI:
 642                if (vf)
 643                        device_info = NULL;
 644                else
 645                        device_info = &kaveri_device_info;
 646                f2g = &gfx_v7_kfd2kgd;
 647                break;
 648#endif
 649        case CHIP_CARRIZO:
 650                if (vf)
 651                        device_info = NULL;
 652                else
 653                        device_info = &carrizo_device_info;
 654                f2g = &gfx_v8_kfd2kgd;
 655                break;
 656#endif
 657#ifdef CONFIG_DRM_AMDGPU_CIK
 658        case CHIP_HAWAII:
 659                if (vf)
 660                        device_info = NULL;
 661                else
 662                        device_info = &hawaii_device_info;
 663                f2g = &gfx_v7_kfd2kgd;
 664                break;
 665#endif
 666        case CHIP_TONGA:
 667                if (vf)
 668                        device_info = NULL;
 669                else
 670                        device_info = &tonga_device_info;
 671                f2g = &gfx_v8_kfd2kgd;
 672                break;
 673        case CHIP_FIJI:
 674                if (vf)
 675                        device_info = &fiji_vf_device_info;
 676                else
 677                        device_info = &fiji_device_info;
 678                f2g = &gfx_v8_kfd2kgd;
 679                break;
 680        case CHIP_POLARIS10:
 681                if (vf)
 682                        device_info = &polaris10_vf_device_info;
 683                else
 684                        device_info = &polaris10_device_info;
 685                f2g = &gfx_v8_kfd2kgd;
 686                break;
 687        case CHIP_POLARIS11:
 688                if (vf)
 689                        device_info = NULL;
 690                else
 691                        device_info = &polaris11_device_info;
 692                f2g = &gfx_v8_kfd2kgd;
 693                break;
 694        case CHIP_POLARIS12:
 695                if (vf)
 696                        device_info = NULL;
 697                else
 698                        device_info = &polaris12_device_info;
 699                f2g = &gfx_v8_kfd2kgd;
 700                break;
 701        case CHIP_VEGAM:
 702                if (vf)
 703                        device_info = NULL;
 704                else
 705                        device_info = &vegam_device_info;
 706                f2g = &gfx_v8_kfd2kgd;
 707                break;
 708        default:
 709                switch (adev->ip_versions[GC_HWIP][0]) {
 710                case IP_VERSION(9, 0, 1):
 711                        if (vf)
 712                                device_info = &vega10_vf_device_info;
 713                        else
 714                                device_info = &vega10_device_info;
 715                        f2g = &gfx_v9_kfd2kgd;
 716                        break;
 717#ifdef KFD_SUPPORT_IOMMU_V2
 718                case IP_VERSION(9, 1, 0):
 719                case IP_VERSION(9, 2, 2):
 720                        if (vf)
 721                                device_info = NULL;
 722                        else
 723                                device_info = &raven_device_info;
 724                        f2g = &gfx_v9_kfd2kgd;
 725                        break;
 726#endif
 727                case IP_VERSION(9, 2, 1):
 728                        if (vf)
 729                                device_info = NULL;
 730                        else
 731                                device_info = &vega12_device_info;
 732                        f2g = &gfx_v9_kfd2kgd;
 733                        break;
 734                case IP_VERSION(9, 3, 0):
 735                        if (vf)
 736                                device_info = NULL;
 737                        else
 738                                device_info = &renoir_device_info;
 739                        f2g = &gfx_v9_kfd2kgd;
 740                        break;
 741                case IP_VERSION(9, 4, 0):
 742                        if (vf)
 743                                device_info = NULL;
 744                        else
 745                                device_info = &vega20_device_info;
 746                        f2g = &gfx_v9_kfd2kgd;
 747                        break;
 748                case IP_VERSION(9, 4, 1):
 749                        device_info = &arcturus_device_info;
 750                        f2g = &arcturus_kfd2kgd;
 751                        break;
 752                case IP_VERSION(9, 4, 2):
 753                        device_info = &aldebaran_device_info;
 754                        f2g = &aldebaran_kfd2kgd;
 755                        break;
 756                case IP_VERSION(10, 1, 10):
 757                        if (vf)
 758                                device_info = NULL;
 759                        else
 760                                device_info = &navi10_device_info;
 761                        f2g = &gfx_v10_kfd2kgd;
 762                        break;
 763                case IP_VERSION(10, 1, 2):
 764                        device_info = &navi12_device_info;
 765                        f2g = &gfx_v10_kfd2kgd;
 766                        break;
 767                case IP_VERSION(10, 1, 1):
 768                        if (vf)
 769                                device_info = NULL;
 770                        else
 771                                device_info = &navi14_device_info;
 772                        f2g = &gfx_v10_kfd2kgd;
 773                        break;
 774                case IP_VERSION(10, 1, 3):
 775                        if (vf)
 776                                device_info = NULL;
 777                        else
 778                                device_info = &cyan_skillfish_device_info;
 779                        f2g = &gfx_v10_kfd2kgd;
 780                        break;
 781                case IP_VERSION(10, 3, 0):
 782                        device_info = &sienna_cichlid_device_info;
 783                        f2g = &gfx_v10_3_kfd2kgd;
 784                        break;
 785                case IP_VERSION(10, 3, 2):
 786                        device_info = &navy_flounder_device_info;
 787                        f2g = &gfx_v10_3_kfd2kgd;
 788                        break;
 789                case IP_VERSION(10, 3, 1):
 790                        if (vf)
 791                                device_info = NULL;
 792                        else
 793                                device_info = &vangogh_device_info;
 794                        f2g = &gfx_v10_3_kfd2kgd;
 795                        break;
 796                case IP_VERSION(10, 3, 4):
 797                        device_info = &dimgrey_cavefish_device_info;
 798                        f2g = &gfx_v10_3_kfd2kgd;
 799                        break;
 800                case IP_VERSION(10, 3, 5):
 801                        device_info = &beige_goby_device_info;
 802                        f2g = &gfx_v10_3_kfd2kgd;
 803                        break;
 804                case IP_VERSION(10, 3, 3):
 805                        if (vf)
 806                                device_info = NULL;
 807                        else
 808                                device_info = &yellow_carp_device_info;
 809                        f2g = &gfx_v10_3_kfd2kgd;
 810                        break;
 811                default:
 812                        return NULL;
 813                }
 814                break;
 815        }
 816
 817        if (!device_info || !f2g) {
 818                dev_err(kfd_device, "%s %s not supported in kfd\n",
 819                        amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
 820                return NULL;
 821        }
 822
 823        kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
 824        if (!kfd)
 825                return NULL;
 826
 827        kfd->kgd = kgd;
 828        kfd->device_info = device_info;
 829        kfd->pdev = pdev;
 830        kfd->init_complete = false;
 831        kfd->kfd2kgd = f2g;
 832        atomic_set(&kfd->compute_profile, 0);
 833
 834        mutex_init(&kfd->doorbell_mutex);
 835        memset(&kfd->doorbell_available_index, 0,
 836                sizeof(kfd->doorbell_available_index));
 837
 838        atomic_set(&kfd->sram_ecc_flag, 0);
 839
 840        ida_init(&kfd->doorbell_ida);
 841
 842        return kfd;
 843}
 844
 845static void kfd_cwsr_init(struct kfd_dev *kfd)
 846{
 847        if (cwsr_enable && kfd->device_info->supports_cwsr) {
 848                if (kfd->device_info->asic_family < CHIP_VEGA10) {
 849                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
 850                        kfd->cwsr_isa = cwsr_trap_gfx8_hex;
 851                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
 852                } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
 853                        BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
 854                        kfd->cwsr_isa = cwsr_trap_arcturus_hex;
 855                        kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
 856                } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
 857                        BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
 858                        kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
 859                        kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
 860                } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
 861                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
 862                        kfd->cwsr_isa = cwsr_trap_gfx9_hex;
 863                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
 864                } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
 865                        BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
 866                        kfd->cwsr_isa = cwsr_trap_nv1x_hex;
 867                        kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
 868                } else {
 869                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
 870                        kfd->cwsr_isa = cwsr_trap_gfx10_hex;
 871                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
 872                }
 873
 874                kfd->cwsr_enabled = true;
 875        }
 876}
 877
 878static int kfd_gws_init(struct kfd_dev *kfd)
 879{
 880        int ret = 0;
 881
 882        if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
 883                return 0;
 884
 885        if (hws_gws_support
 886                || (kfd->device_info->asic_family == CHIP_VEGA10
 887                        && kfd->mec2_fw_version >= 0x81b3)
 888                || (kfd->device_info->asic_family >= CHIP_VEGA12
 889                        && kfd->device_info->asic_family <= CHIP_RAVEN
 890                        && kfd->mec2_fw_version >= 0x1b3)
 891                || (kfd->device_info->asic_family == CHIP_ARCTURUS
 892                        && kfd->mec2_fw_version >= 0x30)
 893                || (kfd->device_info->asic_family == CHIP_ALDEBARAN
 894                        && kfd->mec2_fw_version >= 0x28))
 895                ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
 896                                amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
 897
 898        return ret;
 899}
 900
 901static void kfd_smi_init(struct kfd_dev *dev) {
 902        INIT_LIST_HEAD(&dev->smi_clients);
 903        spin_lock_init(&dev->smi_lock);
 904}
 905
 906bool kgd2kfd_device_init(struct kfd_dev *kfd,
 907                         struct drm_device *ddev,
 908                         const struct kgd2kfd_shared_resources *gpu_resources)
 909{
 910        unsigned int size, map_process_packet_size;
 911
 912        kfd->ddev = ddev;
 913        kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 914                        KGD_ENGINE_MEC1);
 915        kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 916                        KGD_ENGINE_MEC2);
 917        kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 918                        KGD_ENGINE_SDMA1);
 919        kfd->shared_resources = *gpu_resources;
 920
 921        kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
 922        kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
 923        kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
 924                        - kfd->vm_info.first_vmid_kfd + 1;
 925
 926        /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
 927         * 32 and 64-bit requests are possible and must be
 928         * supported.
 929         */
 930        kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
 931        if (!kfd->pci_atomic_requested &&
 932            kfd->device_info->needs_pci_atomics &&
 933            (!kfd->device_info->no_atomic_fw_version ||
 934             kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
 935                dev_info(kfd_device,
 936                         "skipped device %x:%x, PCI rejects atomics %d<%d\n",
 937                         kfd->pdev->vendor, kfd->pdev->device,
 938                         kfd->mec_fw_version,
 939                         kfd->device_info->no_atomic_fw_version);
 940                return false;
 941        }
 942
 943        /* Verify module parameters regarding mapped process number*/
 944        if ((hws_max_conc_proc < 0)
 945                        || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
 946                dev_err(kfd_device,
 947                        "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
 948                        hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
 949                        kfd->vm_info.vmid_num_kfd);
 950                kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
 951        } else
 952                kfd->max_proc_per_quantum = hws_max_conc_proc;
 953
 954        /* calculate max size of mqds needed for queues */
 955        size = max_num_of_queues_per_device *
 956                        kfd->device_info->mqd_size_aligned;
 957
 958        /*
 959         * calculate max size of runlist packet.
 960         * There can be only 2 packets at once
 961         */
 962        map_process_packet_size =
 963                        kfd->device_info->asic_family == CHIP_ALDEBARAN ?
 964                                sizeof(struct pm4_mes_map_process_aldebaran) :
 965                                        sizeof(struct pm4_mes_map_process);
 966        size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
 967                max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
 968                + sizeof(struct pm4_mes_runlist)) * 2;
 969
 970        /* Add size of HIQ & DIQ */
 971        size += KFD_KERNEL_QUEUE_SIZE * 2;
 972
 973        /* add another 512KB for all other allocations on gart (HPD, fences) */
 974        size += 512 * 1024;
 975
 976        if (amdgpu_amdkfd_alloc_gtt_mem(
 977                        kfd->kgd, size, &kfd->gtt_mem,
 978                        &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
 979                        false)) {
 980                dev_err(kfd_device, "Could not allocate %d bytes\n", size);
 981                goto alloc_gtt_mem_failure;
 982        }
 983
 984        dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
 985
 986        /* Initialize GTT sa with 512 byte chunk size */
 987        if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
 988                dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
 989                goto kfd_gtt_sa_init_error;
 990        }
 991
 992        if (kfd_doorbell_init(kfd)) {
 993                dev_err(kfd_device,
 994                        "Error initializing doorbell aperture\n");
 995                goto kfd_doorbell_error;
 996        }
 997
 998        kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
 999
1000        kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
1001
1002        if (kfd_interrupt_init(kfd)) {
1003                dev_err(kfd_device, "Error initializing interrupts\n");
1004                goto kfd_interrupt_error;
1005        }
1006
1007        kfd->dqm = device_queue_manager_init(kfd);
1008        if (!kfd->dqm) {
1009                dev_err(kfd_device, "Error initializing queue manager\n");
1010                goto device_queue_manager_error;
1011        }
1012
1013        /* If supported on this device, allocate global GWS that is shared
1014         * by all KFD processes
1015         */
1016        if (kfd_gws_init(kfd)) {
1017                dev_err(kfd_device, "Could not allocate %d gws\n",
1018                        amdgpu_amdkfd_get_num_gws(kfd->kgd));
1019                goto gws_error;
1020        }
1021
1022        /* If CRAT is broken, won't set iommu enabled */
1023        kfd_double_confirm_iommu_support(kfd);
1024
1025        if (kfd_iommu_device_init(kfd)) {
1026                kfd->use_iommu_v2 = false;
1027                dev_err(kfd_device, "Error initializing iommuv2\n");
1028                goto device_iommu_error;
1029        }
1030
1031        kfd_cwsr_init(kfd);
1032
1033        svm_migrate_init((struct amdgpu_device *)kfd->kgd);
1034
1035        if(kgd2kfd_resume_iommu(kfd))
1036                goto device_iommu_error;
1037
1038        if (kfd_resume(kfd))
1039                goto kfd_resume_error;
1040
1041        kfd->dbgmgr = NULL;
1042
1043        if (kfd_topology_add_device(kfd)) {
1044                dev_err(kfd_device, "Error adding device to topology\n");
1045                goto kfd_topology_add_device_error;
1046        }
1047
1048        kfd_smi_init(kfd);
1049
1050        kfd->init_complete = true;
1051        dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
1052                 kfd->pdev->device);
1053
1054        pr_debug("Starting kfd with the following scheduling policy %d\n",
1055                kfd->dqm->sched_policy);
1056
1057        goto out;
1058
1059kfd_topology_add_device_error:
1060kfd_resume_error:
1061device_iommu_error:
1062gws_error:
1063        device_queue_manager_uninit(kfd->dqm);
1064device_queue_manager_error:
1065        kfd_interrupt_exit(kfd);
1066kfd_interrupt_error:
1067        kfd_doorbell_fini(kfd);
1068kfd_doorbell_error:
1069        kfd_gtt_sa_fini(kfd);
1070kfd_gtt_sa_init_error:
1071        amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
1072alloc_gtt_mem_failure:
1073        if (kfd->gws)
1074                amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
1075        dev_err(kfd_device,
1076                "device %x:%x NOT added due to errors\n",
1077                kfd->pdev->vendor, kfd->pdev->device);
1078out:
1079        return kfd->init_complete;
1080}
1081
1082void kgd2kfd_device_exit(struct kfd_dev *kfd)
1083{
1084        if (kfd->init_complete) {
1085                device_queue_manager_uninit(kfd->dqm);
1086                kfd_interrupt_exit(kfd);
1087                kfd_topology_remove_device(kfd);
1088                kfd_doorbell_fini(kfd);
1089                ida_destroy(&kfd->doorbell_ida);
1090                kfd_gtt_sa_fini(kfd);
1091                amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
1092                if (kfd->gws)
1093                        amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
1094        }
1095
1096        kfree(kfd);
1097}
1098
1099int kgd2kfd_pre_reset(struct kfd_dev *kfd)
1100{
1101        if (!kfd->init_complete)
1102                return 0;
1103
1104        kfd_smi_event_update_gpu_reset(kfd, false);
1105
1106        kfd->dqm->ops.pre_reset(kfd->dqm);
1107
1108        kgd2kfd_suspend(kfd, false);
1109
1110        kfd_signal_reset_event(kfd);
1111        return 0;
1112}
1113
1114/*
1115 * Fix me. KFD won't be able to resume existing process for now.
1116 * We will keep all existing process in a evicted state and
1117 * wait the process to be terminated.
1118 */
1119
1120int kgd2kfd_post_reset(struct kfd_dev *kfd)
1121{
1122        int ret;
1123
1124        if (!kfd->init_complete)
1125                return 0;
1126
1127        ret = kfd_resume(kfd);
1128        if (ret)
1129                return ret;
1130        atomic_dec(&kfd_locked);
1131
1132        atomic_set(&kfd->sram_ecc_flag, 0);
1133
1134        kfd_smi_event_update_gpu_reset(kfd, true);
1135
1136        return 0;
1137}
1138
1139bool kfd_is_locked(void)
1140{
1141        return  (atomic_read(&kfd_locked) > 0);
1142}
1143
1144void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
1145{
1146        if (!kfd->init_complete)
1147                return;
1148
1149        /* for runtime suspend, skip locking kfd */
1150        if (!run_pm) {
1151                /* For first KFD device suspend all the KFD processes */
1152                if (atomic_inc_return(&kfd_locked) == 1)
1153                        kfd_suspend_all_processes();
1154        }
1155
1156        kfd->dqm->ops.stop(kfd->dqm);
1157        kfd_iommu_suspend(kfd);
1158}
1159
1160int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
1161{
1162        int ret, count;
1163
1164        if (!kfd->init_complete)
1165                return 0;
1166
1167        ret = kfd_resume(kfd);
1168        if (ret)
1169                return ret;
1170
1171        /* for runtime resume, skip unlocking kfd */
1172        if (!run_pm) {
1173                count = atomic_dec_return(&kfd_locked);
1174                WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
1175                if (count == 0)
1176                        ret = kfd_resume_all_processes();
1177        }
1178
1179        return ret;
1180}
1181
1182int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
1183{
1184        int err = 0;
1185
1186        err = kfd_iommu_resume(kfd);
1187        if (err)
1188                dev_err(kfd_device,
1189                        "Failed to resume IOMMU for device %x:%x\n",
1190                        kfd->pdev->vendor, kfd->pdev->device);
1191        return err;
1192}
1193
1194static int kfd_resume(struct kfd_dev *kfd)
1195{
1196        int err = 0;
1197
1198        err = kfd->dqm->ops.start(kfd->dqm);
1199        if (err)
1200                dev_err(kfd_device,
1201                        "Error starting queue manager for device %x:%x\n",
1202                        kfd->pdev->vendor, kfd->pdev->device);
1203
1204        return err;
1205}
1206
1207static inline void kfd_queue_work(struct workqueue_struct *wq,
1208                                  struct work_struct *work)
1209{
1210        int cpu, new_cpu;
1211
1212        cpu = new_cpu = smp_processor_id();
1213        do {
1214                new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
1215                if (cpu_to_node(new_cpu) == numa_node_id())
1216                        break;
1217        } while (cpu != new_cpu);
1218
1219        queue_work_on(new_cpu, wq, work);
1220}
1221
1222/* This is called directly from KGD at ISR. */
1223void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1224{
1225        uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
1226        bool is_patched = false;
1227        unsigned long flags;
1228
1229        if (!kfd->init_complete)
1230                return;
1231
1232        if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
1233                dev_err_once(kfd_device, "Ring entry too small\n");
1234                return;
1235        }
1236
1237        spin_lock_irqsave(&kfd->interrupt_lock, flags);
1238
1239        if (kfd->interrupts_active
1240            && interrupt_is_wanted(kfd, ih_ring_entry,
1241                                   patched_ihre, &is_patched)
1242            && enqueue_ih_ring_entry(kfd,
1243                                     is_patched ? patched_ihre : ih_ring_entry))
1244                kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
1245
1246        spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
1247}
1248
1249int kgd2kfd_quiesce_mm(struct mm_struct *mm)
1250{
1251        struct kfd_process *p;
1252        int r;
1253
1254        /* Because we are called from arbitrary context (workqueue) as opposed
1255         * to process context, kfd_process could attempt to exit while we are
1256         * running so the lookup function increments the process ref count.
1257         */
1258        p = kfd_lookup_process_by_mm(mm);
1259        if (!p)
1260                return -ESRCH;
1261
1262        WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1263        r = kfd_process_evict_queues(p);
1264
1265        kfd_unref_process(p);
1266        return r;
1267}
1268
1269int kgd2kfd_resume_mm(struct mm_struct *mm)
1270{
1271        struct kfd_process *p;
1272        int r;
1273
1274        /* Because we are called from arbitrary context (workqueue) as opposed
1275         * to process context, kfd_process could attempt to exit while we are
1276         * running so the lookup function increments the process ref count.
1277         */
1278        p = kfd_lookup_process_by_mm(mm);
1279        if (!p)
1280                return -ESRCH;
1281
1282        r = kfd_process_restore_queues(p);
1283
1284        kfd_unref_process(p);
1285        return r;
1286}
1287
1288/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
1289 *   prepare for safe eviction of KFD BOs that belong to the specified
1290 *   process.
1291 *
1292 * @mm: mm_struct that identifies the specified KFD process
1293 * @fence: eviction fence attached to KFD process BOs
1294 *
1295 */
1296int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
1297                                               struct dma_fence *fence)
1298{
1299        struct kfd_process *p;
1300        unsigned long active_time;
1301        unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
1302
1303        if (!fence)
1304                return -EINVAL;
1305
1306        if (dma_fence_is_signaled(fence))
1307                return 0;
1308
1309        p = kfd_lookup_process_by_mm(mm);
1310        if (!p)
1311                return -ENODEV;
1312
1313        if (fence->seqno == p->last_eviction_seqno)
1314                goto out;
1315
1316        p->last_eviction_seqno = fence->seqno;
1317
1318        /* Avoid KFD process starvation. Wait for at least
1319         * PROCESS_ACTIVE_TIME_MS before evicting the process again
1320         */
1321        active_time = get_jiffies_64() - p->last_restore_timestamp;
1322        if (delay_jiffies > active_time)
1323                delay_jiffies -= active_time;
1324        else
1325                delay_jiffies = 0;
1326
1327        /* During process initialization eviction_work.dwork is initialized
1328         * to kfd_evict_bo_worker
1329         */
1330        WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1331             p->lead_thread->pid, delay_jiffies);
1332        schedule_delayed_work(&p->eviction_work, delay_jiffies);
1333out:
1334        kfd_unref_process(p);
1335        return 0;
1336}
1337
1338static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1339                                unsigned int chunk_size)
1340{
1341        unsigned int num_of_longs;
1342
1343        if (WARN_ON(buf_size < chunk_size))
1344                return -EINVAL;
1345        if (WARN_ON(buf_size == 0))
1346                return -EINVAL;
1347        if (WARN_ON(chunk_size == 0))
1348                return -EINVAL;
1349
1350        kfd->gtt_sa_chunk_size = chunk_size;
1351        kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1352
1353        num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
1354                BITS_PER_LONG;
1355
1356        kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
1357
1358        if (!kfd->gtt_sa_bitmap)
1359                return -ENOMEM;
1360
1361        pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
1362                        kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1363
1364        mutex_init(&kfd->gtt_sa_lock);
1365
1366        return 0;
1367
1368}
1369
1370static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1371{
1372        mutex_destroy(&kfd->gtt_sa_lock);
1373        kfree(kfd->gtt_sa_bitmap);
1374}
1375
1376static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
1377                                                unsigned int bit_num,
1378                                                unsigned int chunk_size)
1379{
1380        return start_addr + bit_num * chunk_size;
1381}
1382
1383static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
1384                                                unsigned int bit_num,
1385                                                unsigned int chunk_size)
1386{
1387        return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
1388}
1389
1390int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1391                        struct kfd_mem_obj **mem_obj)
1392{
1393        unsigned int found, start_search, cur_size;
1394
1395        if (size == 0)
1396                return -EINVAL;
1397
1398        if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1399                return -ENOMEM;
1400
1401        *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
1402        if (!(*mem_obj))
1403                return -ENOMEM;
1404
1405        pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
1406
1407        start_search = 0;
1408
1409        mutex_lock(&kfd->gtt_sa_lock);
1410
1411kfd_gtt_restart_search:
1412        /* Find the first chunk that is free */
1413        found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1414                                        kfd->gtt_sa_num_of_chunks,
1415                                        start_search);
1416
1417        pr_debug("Found = %d\n", found);
1418
1419        /* If there wasn't any free chunk, bail out */
1420        if (found == kfd->gtt_sa_num_of_chunks)
1421                goto kfd_gtt_no_free_chunk;
1422
1423        /* Update fields of mem_obj */
1424        (*mem_obj)->range_start = found;
1425        (*mem_obj)->range_end = found;
1426        (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1427                                        kfd->gtt_start_gpu_addr,
1428                                        found,
1429                                        kfd->gtt_sa_chunk_size);
1430        (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1431                                        kfd->gtt_start_cpu_ptr,
1432                                        found,
1433                                        kfd->gtt_sa_chunk_size);
1434
1435        pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1436                        (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1437
1438        /* If we need only one chunk, mark it as allocated and get out */
1439        if (size <= kfd->gtt_sa_chunk_size) {
1440                pr_debug("Single bit\n");
1441                set_bit(found, kfd->gtt_sa_bitmap);
1442                goto kfd_gtt_out;
1443        }
1444
1445        /* Otherwise, try to see if we have enough contiguous chunks */
1446        cur_size = size - kfd->gtt_sa_chunk_size;
1447        do {
1448                (*mem_obj)->range_end =
1449                        find_next_zero_bit(kfd->gtt_sa_bitmap,
1450                                        kfd->gtt_sa_num_of_chunks, ++found);
1451                /*
1452                 * If next free chunk is not contiguous than we need to
1453                 * restart our search from the last free chunk we found (which
1454                 * wasn't contiguous to the previous ones
1455                 */
1456                if ((*mem_obj)->range_end != found) {
1457                        start_search = found;
1458                        goto kfd_gtt_restart_search;
1459                }
1460
1461                /*
1462                 * If we reached end of buffer, bail out with error
1463                 */
1464                if (found == kfd->gtt_sa_num_of_chunks)
1465                        goto kfd_gtt_no_free_chunk;
1466
1467                /* Check if we don't need another chunk */
1468                if (cur_size <= kfd->gtt_sa_chunk_size)
1469                        cur_size = 0;
1470                else
1471                        cur_size -= kfd->gtt_sa_chunk_size;
1472
1473        } while (cur_size > 0);
1474
1475        pr_debug("range_start = %d, range_end = %d\n",
1476                (*mem_obj)->range_start, (*mem_obj)->range_end);
1477
1478        /* Mark the chunks as allocated */
1479        for (found = (*mem_obj)->range_start;
1480                found <= (*mem_obj)->range_end;
1481                found++)
1482                set_bit(found, kfd->gtt_sa_bitmap);
1483
1484kfd_gtt_out:
1485        mutex_unlock(&kfd->gtt_sa_lock);
1486        return 0;
1487
1488kfd_gtt_no_free_chunk:
1489        pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1490        mutex_unlock(&kfd->gtt_sa_lock);
1491        kfree(*mem_obj);
1492        return -ENOMEM;
1493}
1494
1495int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1496{
1497        unsigned int bit;
1498
1499        /* Act like kfree when trying to free a NULL object */
1500        if (!mem_obj)
1501                return 0;
1502
1503        pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1504                        mem_obj, mem_obj->range_start, mem_obj->range_end);
1505
1506        mutex_lock(&kfd->gtt_sa_lock);
1507
1508        /* Mark the chunks as free */
1509        for (bit = mem_obj->range_start;
1510                bit <= mem_obj->range_end;
1511                bit++)
1512                clear_bit(bit, kfd->gtt_sa_bitmap);
1513
1514        mutex_unlock(&kfd->gtt_sa_lock);
1515
1516        kfree(mem_obj);
1517        return 0;
1518}
1519
1520void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1521{
1522        if (kfd)
1523                atomic_inc(&kfd->sram_ecc_flag);
1524}
1525
1526void kfd_inc_compute_active(struct kfd_dev *kfd)
1527{
1528        if (atomic_inc_return(&kfd->compute_profile) == 1)
1529                amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
1530}
1531
1532void kfd_dec_compute_active(struct kfd_dev *kfd)
1533{
1534        int count = atomic_dec_return(&kfd->compute_profile);
1535
1536        if (count == 0)
1537                amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
1538        WARN_ONCE(count < 0, "Compute profile ref. count error");
1539}
1540
1541void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1542{
1543        if (kfd && kfd->init_complete)
1544                kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1545}
1546
1547#if defined(CONFIG_DEBUG_FS)
1548
1549/* This function will send a package to HIQ to hang the HWS
1550 * which will trigger a GPU reset and bring the HWS back to normal state
1551 */
1552int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1553{
1554        if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1555                pr_err("HWS is not enabled");
1556                return -EINVAL;
1557        }
1558
1559        return dqm_debugfs_hang_hws(dev->dqm);
1560}
1561
1562#endif
1563