linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include "amdgpu_amdkfd.h"
  24#include "amd_pcie.h"
  25#include "amd_shared.h"
  26
  27#include "amdgpu.h"
  28#include "amdgpu_gfx.h"
  29#include "amdgpu_dma_buf.h"
  30#include <linux/module.h>
  31#include <linux/dma-buf.h>
  32#include "amdgpu_xgmi.h"
  33#include <uapi/linux/kfd_ioctl.h>
  34#include "amdgpu_ras.h"
  35#include "amdgpu_umc.h"
  36
  37/* Total memory size in system memory and all GPU VRAM. Used to
  38 * estimate worst case amount of memory to reserve for page tables
  39 */
  40uint64_t amdgpu_amdkfd_total_mem_size;
  41
  42static bool kfd_initialized;
  43
  44int amdgpu_amdkfd_init(void)
  45{
  46        struct sysinfo si;
  47        int ret;
  48
  49        si_meminfo(&si);
  50        amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
  51        amdgpu_amdkfd_total_mem_size *= si.mem_unit;
  52
  53        ret = kgd2kfd_init();
  54        amdgpu_amdkfd_gpuvm_init_mem_limits();
  55        kfd_initialized = !ret;
  56
  57        return ret;
  58}
  59
  60void amdgpu_amdkfd_fini(void)
  61{
  62        if (kfd_initialized) {
  63                kgd2kfd_exit();
  64                kfd_initialized = false;
  65        }
  66}
  67
  68void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
  69{
  70        bool vf = amdgpu_sriov_vf(adev);
  71
  72        if (!kfd_initialized)
  73                return;
  74
  75        adev->kfd.dev = kgd2kfd_probe(adev, vf);
  76
  77        if (adev->kfd.dev)
  78                amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
  79}
  80
  81/**
  82 * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
  83 *                                setup amdkfd
  84 *
  85 * @adev: amdgpu_device pointer
  86 * @aperture_base: output returning doorbell aperture base physical address
  87 * @aperture_size: output returning doorbell aperture size in bytes
  88 * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
  89 *
  90 * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
  91 * takes doorbells required for its own rings and reports the setup to amdkfd.
  92 * amdgpu reserved doorbells are at the start of the doorbell aperture.
  93 */
  94static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
  95                                         phys_addr_t *aperture_base,
  96                                         size_t *aperture_size,
  97                                         size_t *start_offset)
  98{
  99        /*
 100         * The first num_doorbells are used by amdgpu.
 101         * amdkfd takes whatever's left in the aperture.
 102         */
 103        if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
 104                *aperture_base = adev->doorbell.base;
 105                *aperture_size = adev->doorbell.size;
 106                *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
 107        } else {
 108                *aperture_base = 0;
 109                *aperture_size = 0;
 110                *start_offset = 0;
 111        }
 112}
 113
 114void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 115{
 116        int i;
 117        int last_valid_bit;
 118
 119        if (adev->kfd.dev) {
 120                struct kgd2kfd_shared_resources gpu_resources = {
 121                        .compute_vmid_bitmap =
 122                                ((1 << AMDGPU_NUM_VMID) - 1) -
 123                                ((1 << adev->vm_manager.first_kfd_vmid) - 1),
 124                        .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
 125                        .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
 126                        .gpuvm_size = min(adev->vm_manager.max_pfn
 127                                          << AMDGPU_GPU_PAGE_SHIFT,
 128                                          AMDGPU_GMC_HOLE_START),
 129                        .drm_render_minor = adev_to_drm(adev)->render->index,
 130                        .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
 131
 132                };
 133
 134                /* this is going to have a few of the MSBs set that we need to
 135                 * clear
 136                 */
 137                bitmap_complement(gpu_resources.cp_queue_bitmap,
 138                                  adev->gfx.mec.queue_bitmap,
 139                                  KGD_MAX_QUEUES);
 140
 141                /* According to linux/bitmap.h we shouldn't use bitmap_clear if
 142                 * nbits is not compile time constant
 143                 */
 144                last_valid_bit = 1 /* only first MEC can have compute queues */
 145                                * adev->gfx.mec.num_pipe_per_mec
 146                                * adev->gfx.mec.num_queue_per_pipe;
 147                for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
 148                        clear_bit(i, gpu_resources.cp_queue_bitmap);
 149
 150                amdgpu_doorbell_get_kfd_info(adev,
 151                                &gpu_resources.doorbell_physical_address,
 152                                &gpu_resources.doorbell_aperture_size,
 153                                &gpu_resources.doorbell_start_offset);
 154
 155                /* Since SOC15, BIF starts to statically use the
 156                 * lower 12 bits of doorbell addresses for routing
 157                 * based on settings in registers like
 158                 * SDMA0_DOORBELL_RANGE etc..
 159                 * In order to route a doorbell to CP engine, the lower
 160                 * 12 bits of its address has to be outside the range
 161                 * set for SDMA, VCN, and IH blocks.
 162                 */
 163                if (adev->asic_type >= CHIP_VEGA10) {
 164                        gpu_resources.non_cp_doorbells_start =
 165                                        adev->doorbell_index.first_non_cp;
 166                        gpu_resources.non_cp_doorbells_end =
 167                                        adev->doorbell_index.last_non_cp;
 168                }
 169
 170                adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
 171                                                adev_to_drm(adev), &gpu_resources);
 172        }
 173}
 174
 175void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
 176{
 177        if (adev->kfd.dev) {
 178                kgd2kfd_device_exit(adev->kfd.dev);
 179                adev->kfd.dev = NULL;
 180        }
 181}
 182
 183void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 184                const void *ih_ring_entry)
 185{
 186        if (adev->kfd.dev)
 187                kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
 188}
 189
 190void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
 191{
 192        if (adev->kfd.dev)
 193                kgd2kfd_suspend(adev->kfd.dev, run_pm);
 194}
 195
 196int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
 197{
 198        int r = 0;
 199
 200        if (adev->kfd.dev)
 201                r = kgd2kfd_resume_iommu(adev->kfd.dev);
 202
 203        return r;
 204}
 205
 206int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
 207{
 208        int r = 0;
 209
 210        if (adev->kfd.dev)
 211                r = kgd2kfd_resume(adev->kfd.dev, run_pm);
 212
 213        return r;
 214}
 215
 216int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
 217{
 218        int r = 0;
 219
 220        if (adev->kfd.dev)
 221                r = kgd2kfd_pre_reset(adev->kfd.dev);
 222
 223        return r;
 224}
 225
 226int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
 227{
 228        int r = 0;
 229
 230        if (adev->kfd.dev)
 231                r = kgd2kfd_post_reset(adev->kfd.dev);
 232
 233        return r;
 234}
 235
 236void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
 237{
 238        if (amdgpu_device_should_recover_gpu(adev))
 239                amdgpu_device_gpu_recover(adev, NULL);
 240}
 241
 242int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
 243                                void **mem_obj, uint64_t *gpu_addr,
 244                                void **cpu_ptr, bool cp_mqd_gfx9)
 245{
 246        struct amdgpu_bo *bo = NULL;
 247        struct amdgpu_bo_param bp;
 248        int r;
 249        void *cpu_ptr_tmp = NULL;
 250
 251        memset(&bp, 0, sizeof(bp));
 252        bp.size = size;
 253        bp.byte_align = PAGE_SIZE;
 254        bp.domain = AMDGPU_GEM_DOMAIN_GTT;
 255        bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 256        bp.type = ttm_bo_type_kernel;
 257        bp.resv = NULL;
 258        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 259
 260        if (cp_mqd_gfx9)
 261                bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
 262
 263        r = amdgpu_bo_create(adev, &bp, &bo);
 264        if (r) {
 265                dev_err(adev->dev,
 266                        "failed to allocate BO for amdkfd (%d)\n", r);
 267                return r;
 268        }
 269
 270        /* map the buffer */
 271        r = amdgpu_bo_reserve(bo, true);
 272        if (r) {
 273                dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 274                goto allocate_mem_reserve_bo_failed;
 275        }
 276
 277        r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 278        if (r) {
 279                dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 280                goto allocate_mem_pin_bo_failed;
 281        }
 282
 283        r = amdgpu_ttm_alloc_gart(&bo->tbo);
 284        if (r) {
 285                dev_err(adev->dev, "%p bind failed\n", bo);
 286                goto allocate_mem_kmap_bo_failed;
 287        }
 288
 289        r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
 290        if (r) {
 291                dev_err(adev->dev,
 292                        "(%d) failed to map bo to kernel for amdkfd\n", r);
 293                goto allocate_mem_kmap_bo_failed;
 294        }
 295
 296        *mem_obj = bo;
 297        *gpu_addr = amdgpu_bo_gpu_offset(bo);
 298        *cpu_ptr = cpu_ptr_tmp;
 299
 300        amdgpu_bo_unreserve(bo);
 301
 302        return 0;
 303
 304allocate_mem_kmap_bo_failed:
 305        amdgpu_bo_unpin(bo);
 306allocate_mem_pin_bo_failed:
 307        amdgpu_bo_unreserve(bo);
 308allocate_mem_reserve_bo_failed:
 309        amdgpu_bo_unref(&bo);
 310
 311        return r;
 312}
 313
 314void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
 315{
 316        struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
 317
 318        amdgpu_bo_reserve(bo, true);
 319        amdgpu_bo_kunmap(bo);
 320        amdgpu_bo_unpin(bo);
 321        amdgpu_bo_unreserve(bo);
 322        amdgpu_bo_unref(&(bo));
 323}
 324
 325int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
 326                                void **mem_obj)
 327{
 328        struct amdgpu_bo *bo = NULL;
 329        struct amdgpu_bo_user *ubo;
 330        struct amdgpu_bo_param bp;
 331        int r;
 332
 333        memset(&bp, 0, sizeof(bp));
 334        bp.size = size;
 335        bp.byte_align = 1;
 336        bp.domain = AMDGPU_GEM_DOMAIN_GWS;
 337        bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 338        bp.type = ttm_bo_type_device;
 339        bp.resv = NULL;
 340        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 341
 342        r = amdgpu_bo_create_user(adev, &bp, &ubo);
 343        if (r) {
 344                dev_err(adev->dev,
 345                        "failed to allocate gws BO for amdkfd (%d)\n", r);
 346                return r;
 347        }
 348
 349        bo = &ubo->bo;
 350        *mem_obj = bo;
 351        return 0;
 352}
 353
 354void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
 355{
 356        struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
 357
 358        amdgpu_bo_unref(&bo);
 359}
 360
 361uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
 362                                      enum kgd_engine_type type)
 363{
 364        switch (type) {
 365        case KGD_ENGINE_PFP:
 366                return adev->gfx.pfp_fw_version;
 367
 368        case KGD_ENGINE_ME:
 369                return adev->gfx.me_fw_version;
 370
 371        case KGD_ENGINE_CE:
 372                return adev->gfx.ce_fw_version;
 373
 374        case KGD_ENGINE_MEC1:
 375                return adev->gfx.mec_fw_version;
 376
 377        case KGD_ENGINE_MEC2:
 378                return adev->gfx.mec2_fw_version;
 379
 380        case KGD_ENGINE_RLC:
 381                return adev->gfx.rlc_fw_version;
 382
 383        case KGD_ENGINE_SDMA1:
 384                return adev->sdma.instance[0].fw_version;
 385
 386        case KGD_ENGINE_SDMA2:
 387                return adev->sdma.instance[1].fw_version;
 388
 389        default:
 390                return 0;
 391        }
 392
 393        return 0;
 394}
 395
 396void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
 397                                      struct kfd_local_mem_info *mem_info)
 398{
 399        memset(mem_info, 0, sizeof(*mem_info));
 400
 401        mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
 402        mem_info->local_mem_size_private = adev->gmc.real_vram_size -
 403                                                adev->gmc.visible_vram_size;
 404
 405        mem_info->vram_width = adev->gmc.vram_width;
 406
 407        pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
 408                        &adev->gmc.aper_base,
 409                        mem_info->local_mem_size_public,
 410                        mem_info->local_mem_size_private);
 411
 412        if (amdgpu_sriov_vf(adev))
 413                mem_info->mem_clk_max = adev->clock.default_mclk / 100;
 414        else if (adev->pm.dpm_enabled) {
 415                if (amdgpu_emu_mode == 1)
 416                        mem_info->mem_clk_max = 0;
 417                else
 418                        mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
 419        } else
 420                mem_info->mem_clk_max = 100;
 421}
 422
 423uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
 424{
 425        if (adev->gfx.funcs->get_gpu_clock_counter)
 426                return adev->gfx.funcs->get_gpu_clock_counter(adev);
 427        return 0;
 428}
 429
 430uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
 431{
 432        /* the sclk is in quantas of 10kHz */
 433        if (amdgpu_sriov_vf(adev))
 434                return adev->clock.default_sclk / 100;
 435        else if (adev->pm.dpm_enabled)
 436                return amdgpu_dpm_get_sclk(adev, false) / 100;
 437        else
 438                return 100;
 439}
 440
 441void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
 442{
 443        struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
 444
 445        memset(cu_info, 0, sizeof(*cu_info));
 446        if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
 447                return;
 448
 449        cu_info->cu_active_number = acu_info.number;
 450        cu_info->cu_ao_mask = acu_info.ao_cu_mask;
 451        memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
 452               sizeof(acu_info.bitmap));
 453        cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
 454        cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
 455        cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
 456        cu_info->simd_per_cu = acu_info.simd_per_cu;
 457        cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
 458        cu_info->wave_front_size = acu_info.wave_front_size;
 459        cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
 460        cu_info->lds_size = acu_info.lds_size;
 461}
 462
 463int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
 464                                  struct amdgpu_device **dmabuf_adev,
 465                                  uint64_t *bo_size, void *metadata_buffer,
 466                                  size_t buffer_size, uint32_t *metadata_size,
 467                                  uint32_t *flags)
 468{
 469        struct dma_buf *dma_buf;
 470        struct drm_gem_object *obj;
 471        struct amdgpu_bo *bo;
 472        uint64_t metadata_flags;
 473        int r = -EINVAL;
 474
 475        dma_buf = dma_buf_get(dma_buf_fd);
 476        if (IS_ERR(dma_buf))
 477                return PTR_ERR(dma_buf);
 478
 479        if (dma_buf->ops != &amdgpu_dmabuf_ops)
 480                /* Can't handle non-graphics buffers */
 481                goto out_put;
 482
 483        obj = dma_buf->priv;
 484        if (obj->dev->driver != adev_to_drm(adev)->driver)
 485                /* Can't handle buffers from different drivers */
 486                goto out_put;
 487
 488        adev = drm_to_adev(obj->dev);
 489        bo = gem_to_amdgpu_bo(obj);
 490        if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
 491                                    AMDGPU_GEM_DOMAIN_GTT)))
 492                /* Only VRAM and GTT BOs are supported */
 493                goto out_put;
 494
 495        r = 0;
 496        if (dmabuf_adev)
 497                *dmabuf_adev = adev;
 498        if (bo_size)
 499                *bo_size = amdgpu_bo_size(bo);
 500        if (metadata_buffer)
 501                r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
 502                                           metadata_size, &metadata_flags);
 503        if (flags) {
 504                *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
 505                                KFD_IOC_ALLOC_MEM_FLAGS_VRAM
 506                                : KFD_IOC_ALLOC_MEM_FLAGS_GTT;
 507
 508                if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 509                        *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
 510        }
 511
 512out_put:
 513        dma_buf_put(dma_buf);
 514        return r;
 515}
 516
 517uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
 518                                          struct amdgpu_device *src)
 519{
 520        struct amdgpu_device *peer_adev = src;
 521        struct amdgpu_device *adev = dst;
 522        int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
 523
 524        if (ret < 0) {
 525                DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
 526                        adev->gmc.xgmi.physical_node_id,
 527                        peer_adev->gmc.xgmi.physical_node_id, ret);
 528                ret = 0;
 529        }
 530        return  (uint8_t)ret;
 531}
 532
 533int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
 534                                            struct amdgpu_device *src,
 535                                            bool is_min)
 536{
 537        struct amdgpu_device *adev = dst, *peer_adev;
 538        int num_links;
 539
 540        if (adev->asic_type != CHIP_ALDEBARAN)
 541                return 0;
 542
 543        if (src)
 544                peer_adev = src;
 545
 546        /* num links returns 0 for indirect peers since indirect route is unknown. */
 547        num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
 548        if (num_links < 0) {
 549                DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
 550                        adev->gmc.xgmi.physical_node_id,
 551                        peer_adev->gmc.xgmi.physical_node_id, num_links);
 552                num_links = 0;
 553        }
 554
 555        /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
 556        return (num_links * 16 * 25000)/BITS_PER_BYTE;
 557}
 558
 559int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
 560{
 561        int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
 562                                                        fls(adev->pm.pcie_mlw_mask)) - 1;
 563        int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
 564                                                CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
 565                                        fls(adev->pm.pcie_gen_mask &
 566                                                CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
 567        uint32_t num_lanes_mask = 1 << num_lanes_shift;
 568        uint32_t gen_speed_mask = 1 << gen_speed_shift;
 569        int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
 570
 571        switch (num_lanes_mask) {
 572        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
 573                num_lanes_factor = 1;
 574                break;
 575        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
 576                num_lanes_factor = 2;
 577                break;
 578        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
 579                num_lanes_factor = 4;
 580                break;
 581        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
 582                num_lanes_factor = 8;
 583                break;
 584        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
 585                num_lanes_factor = 12;
 586                break;
 587        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
 588                num_lanes_factor = 16;
 589                break;
 590        case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
 591                num_lanes_factor = 32;
 592                break;
 593        }
 594
 595        switch (gen_speed_mask) {
 596        case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
 597                gen_speed_mbits_factor = 2500;
 598                break;
 599        case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
 600                gen_speed_mbits_factor = 5000;
 601                break;
 602        case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
 603                gen_speed_mbits_factor = 8000;
 604                break;
 605        case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
 606                gen_speed_mbits_factor = 16000;
 607                break;
 608        case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
 609                gen_speed_mbits_factor = 32000;
 610                break;
 611        }
 612
 613        return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
 614}
 615
 616int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 617                                enum kgd_engine_type engine,
 618                                uint32_t vmid, uint64_t gpu_addr,
 619                                uint32_t *ib_cmd, uint32_t ib_len)
 620{
 621        struct amdgpu_job *job;
 622        struct amdgpu_ib *ib;
 623        struct amdgpu_ring *ring;
 624        struct dma_fence *f = NULL;
 625        int ret;
 626
 627        switch (engine) {
 628        case KGD_ENGINE_MEC1:
 629                ring = &adev->gfx.compute_ring[0];
 630                break;
 631        case KGD_ENGINE_SDMA1:
 632                ring = &adev->sdma.instance[0].ring;
 633                break;
 634        case KGD_ENGINE_SDMA2:
 635                ring = &adev->sdma.instance[1].ring;
 636                break;
 637        default:
 638                pr_err("Invalid engine in IB submission: %d\n", engine);
 639                ret = -EINVAL;
 640                goto err;
 641        }
 642
 643        ret = amdgpu_job_alloc(adev, 1, &job, NULL);
 644        if (ret)
 645                goto err;
 646
 647        ib = &job->ibs[0];
 648        memset(ib, 0, sizeof(struct amdgpu_ib));
 649
 650        ib->gpu_addr = gpu_addr;
 651        ib->ptr = ib_cmd;
 652        ib->length_dw = ib_len;
 653        /* This works for NO_HWS. TODO: need to handle without knowing VMID */
 654        job->vmid = vmid;
 655
 656        ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
 657
 658        if (ret) {
 659                DRM_ERROR("amdgpu: failed to schedule IB.\n");
 660                goto err_ib_sched;
 661        }
 662
 663        ret = dma_fence_wait(f, false);
 664
 665err_ib_sched:
 666        amdgpu_job_free(job);
 667err:
 668        return ret;
 669}
 670
 671void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 672{
 673        amdgpu_dpm_switch_power_profile(adev,
 674                                        PP_SMC_POWER_PROFILE_COMPUTE,
 675                                        !idle);
 676}
 677
 678bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 679{
 680        if (adev->kfd.dev)
 681                return vmid >= adev->vm_manager.first_kfd_vmid;
 682
 683        return false;
 684}
 685
 686int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
 687                                     uint16_t vmid)
 688{
 689        if (adev->family == AMDGPU_FAMILY_AI) {
 690                int i;
 691
 692                for (i = 0; i < adev->num_vmhubs; i++)
 693                        amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
 694        } else {
 695                amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
 696        }
 697
 698        return 0;
 699}
 700
 701int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 702                                      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
 703{
 704        bool all_hub = false;
 705
 706        if (adev->family == AMDGPU_FAMILY_AI)
 707                all_hub = true;
 708
 709        return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
 710}
 711
 712bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 713{
 714        return adev->have_atomics_support;
 715}
 716
 717void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
 718{
 719        struct ras_err_data err_data = {0, 0, 0, NULL};
 720
 721        /* CPU MCA will handle page retirement if connected_to_cpu is 1 */
 722        if (!adev->gmc.xgmi.connected_to_cpu)
 723                amdgpu_umc_poison_handler(adev, &err_data, reset);
 724        else if (reset)
 725                amdgpu_amdkfd_gpu_reset(adev);
 726}
 727