linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/power_supply.h>
  29#include <linux/kthread.h>
  30#include <linux/module.h>
  31#include <linux/console.h>
  32#include <linux/slab.h>
  33
  34#include <drm/drm_atomic_helper.h>
  35#include <drm/drm_probe_helper.h>
  36#include <drm/amdgpu_drm.h>
  37#include <linux/vgaarb.h>
  38#include <linux/vga_switcheroo.h>
  39#include <linux/efi.h>
  40#include "amdgpu.h"
  41#include "amdgpu_trace.h"
  42#include "amdgpu_i2c.h"
  43#include "atom.h"
  44#include "amdgpu_atombios.h"
  45#include "amdgpu_atomfirmware.h"
  46#include "amd_pcie.h"
  47#ifdef CONFIG_DRM_AMDGPU_SI
  48#include "si.h"
  49#endif
  50#ifdef CONFIG_DRM_AMDGPU_CIK
  51#include "cik.h"
  52#endif
  53#include "vi.h"
  54#include "soc15.h"
  55#include "nv.h"
  56#include "bif/bif_4_1_d.h"
  57#include <linux/pci.h>
  58#include <linux/firmware.h>
  59#include "amdgpu_vf_error.h"
  60
  61#include "amdgpu_amdkfd.h"
  62#include "amdgpu_pm.h"
  63
  64#include "amdgpu_xgmi.h"
  65#include "amdgpu_ras.h"
  66#include "amdgpu_pmu.h"
  67
  68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  73MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  74MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
  75MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
  76MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
  77MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  78
  79#define AMDGPU_RESUME_MS                2000
  80
  81static const char *amdgpu_asic_name[] = {
  82        "TAHITI",
  83        "PITCAIRN",
  84        "VERDE",
  85        "OLAND",
  86        "HAINAN",
  87        "BONAIRE",
  88        "KAVERI",
  89        "KABINI",
  90        "HAWAII",
  91        "MULLINS",
  92        "TOPAZ",
  93        "TONGA",
  94        "FIJI",
  95        "CARRIZO",
  96        "STONEY",
  97        "POLARIS10",
  98        "POLARIS11",
  99        "POLARIS12",
 100        "VEGAM",
 101        "VEGA10",
 102        "VEGA12",
 103        "VEGA20",
 104        "RAVEN",
 105        "ARCTURUS",
 106        "RENOIR",
 107        "NAVI10",
 108        "NAVI14",
 109        "NAVI12",
 110        "LAST",
 111};
 112
 113/**
 114 * DOC: pcie_replay_count
 115 *
 116 * The amdgpu driver provides a sysfs API for reporting the total number
 117 * of PCIe replays (NAKs)
 118 * The file pcie_replay_count is used for this and returns the total
 119 * number of replays as a sum of the NAKs generated and NAKs received
 120 */
 121
 122static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 123                struct device_attribute *attr, char *buf)
 124{
 125        struct drm_device *ddev = dev_get_drvdata(dev);
 126        struct amdgpu_device *adev = ddev->dev_private;
 127        uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 128
 129        return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
 130}
 131
 132static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 133                amdgpu_device_get_pcie_replay_count, NULL);
 134
 135static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 136
 137/**
 138 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
 139 *
 140 * @dev: drm_device pointer
 141 *
 142 * Returns true if the device is a dGPU with HG/PX power control,
 143 * otherwise return false.
 144 */
 145bool amdgpu_device_is_px(struct drm_device *dev)
 146{
 147        struct amdgpu_device *adev = dev->dev_private;
 148
 149        if (adev->flags & AMD_IS_PX)
 150                return true;
 151        return false;
 152}
 153
 154/*
 155 * MMIO register access helper functions.
 156 */
 157/**
 158 * amdgpu_mm_rreg - read a memory mapped IO register
 159 *
 160 * @adev: amdgpu_device pointer
 161 * @reg: dword aligned register offset
 162 * @acc_flags: access flags which require special behavior
 163 *
 164 * Returns the 32 bit value from the offset specified.
 165 */
 166uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 167                        uint32_t acc_flags)
 168{
 169        uint32_t ret;
 170
 171        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 172                return amdgpu_virt_kiq_rreg(adev, reg);
 173
 174        if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 175                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 176        else {
 177                unsigned long flags;
 178
 179                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 180                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 181                ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 182                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 183        }
 184        trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
 185        return ret;
 186}
 187
 188/*
 189 * MMIO register read with bytes helper functions
 190 * @offset:bytes offset from MMIO start
 191 *
 192*/
 193
 194/**
 195 * amdgpu_mm_rreg8 - read a memory mapped IO register
 196 *
 197 * @adev: amdgpu_device pointer
 198 * @offset: byte aligned register offset
 199 *
 200 * Returns the 8 bit value from the offset specified.
 201 */
 202uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
 203        if (offset < adev->rmmio_size)
 204                return (readb(adev->rmmio + offset));
 205        BUG();
 206}
 207
 208/*
 209 * MMIO register write with bytes helper functions
 210 * @offset:bytes offset from MMIO start
 211 * @value: the value want to be written to the register
 212 *
 213*/
 214/**
 215 * amdgpu_mm_wreg8 - read a memory mapped IO register
 216 *
 217 * @adev: amdgpu_device pointer
 218 * @offset: byte aligned register offset
 219 * @value: 8 bit value to write
 220 *
 221 * Writes the value specified to the offset specified.
 222 */
 223void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
 224        if (offset < adev->rmmio_size)
 225                writeb(value, adev->rmmio + offset);
 226        else
 227                BUG();
 228}
 229
 230/**
 231 * amdgpu_mm_wreg - write to a memory mapped IO register
 232 *
 233 * @adev: amdgpu_device pointer
 234 * @reg: dword aligned register offset
 235 * @v: 32 bit value to write to the register
 236 * @acc_flags: access flags which require special behavior
 237 *
 238 * Writes the value specified to the offset specified.
 239 */
 240void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 241                    uint32_t acc_flags)
 242{
 243        trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
 244
 245        if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 246                adev->last_mm_index = v;
 247        }
 248
 249        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 250                return amdgpu_virt_kiq_wreg(adev, reg, v);
 251
 252        if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 253                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 254        else {
 255                unsigned long flags;
 256
 257                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 258                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 259                writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 260                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 261        }
 262
 263        if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 264                udelay(500);
 265        }
 266}
 267
 268/**
 269 * amdgpu_io_rreg - read an IO register
 270 *
 271 * @adev: amdgpu_device pointer
 272 * @reg: dword aligned register offset
 273 *
 274 * Returns the 32 bit value from the offset specified.
 275 */
 276u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 277{
 278        if ((reg * 4) < adev->rio_mem_size)
 279                return ioread32(adev->rio_mem + (reg * 4));
 280        else {
 281                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 282                return ioread32(adev->rio_mem + (mmMM_DATA * 4));
 283        }
 284}
 285
 286/**
 287 * amdgpu_io_wreg - write to an IO register
 288 *
 289 * @adev: amdgpu_device pointer
 290 * @reg: dword aligned register offset
 291 * @v: 32 bit value to write to the register
 292 *
 293 * Writes the value specified to the offset specified.
 294 */
 295void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 296{
 297        if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 298                adev->last_mm_index = v;
 299        }
 300
 301        if ((reg * 4) < adev->rio_mem_size)
 302                iowrite32(v, adev->rio_mem + (reg * 4));
 303        else {
 304                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 305                iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
 306        }
 307
 308        if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 309                udelay(500);
 310        }
 311}
 312
 313/**
 314 * amdgpu_mm_rdoorbell - read a doorbell dword
 315 *
 316 * @adev: amdgpu_device pointer
 317 * @index: doorbell index
 318 *
 319 * Returns the value in the doorbell aperture at the
 320 * requested doorbell index (CIK).
 321 */
 322u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 323{
 324        if (index < adev->doorbell.num_doorbells) {
 325                return readl(adev->doorbell.ptr + index);
 326        } else {
 327                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 328                return 0;
 329        }
 330}
 331
 332/**
 333 * amdgpu_mm_wdoorbell - write a doorbell dword
 334 *
 335 * @adev: amdgpu_device pointer
 336 * @index: doorbell index
 337 * @v: value to write
 338 *
 339 * Writes @v to the doorbell aperture at the
 340 * requested doorbell index (CIK).
 341 */
 342void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 343{
 344        if (index < adev->doorbell.num_doorbells) {
 345                writel(v, adev->doorbell.ptr + index);
 346        } else {
 347                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 348        }
 349}
 350
 351/**
 352 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 353 *
 354 * @adev: amdgpu_device pointer
 355 * @index: doorbell index
 356 *
 357 * Returns the value in the doorbell aperture at the
 358 * requested doorbell index (VEGA10+).
 359 */
 360u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 361{
 362        if (index < adev->doorbell.num_doorbells) {
 363                return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 364        } else {
 365                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 366                return 0;
 367        }
 368}
 369
 370/**
 371 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 372 *
 373 * @adev: amdgpu_device pointer
 374 * @index: doorbell index
 375 * @v: value to write
 376 *
 377 * Writes @v to the doorbell aperture at the
 378 * requested doorbell index (VEGA10+).
 379 */
 380void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 381{
 382        if (index < adev->doorbell.num_doorbells) {
 383                atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 384        } else {
 385                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 386        }
 387}
 388
 389/**
 390 * amdgpu_invalid_rreg - dummy reg read function
 391 *
 392 * @adev: amdgpu device pointer
 393 * @reg: offset of register
 394 *
 395 * Dummy register read function.  Used for register blocks
 396 * that certain asics don't have (all asics).
 397 * Returns the value in the register.
 398 */
 399static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 400{
 401        DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 402        BUG();
 403        return 0;
 404}
 405
 406/**
 407 * amdgpu_invalid_wreg - dummy reg write function
 408 *
 409 * @adev: amdgpu device pointer
 410 * @reg: offset of register
 411 * @v: value to write to the register
 412 *
 413 * Dummy register read function.  Used for register blocks
 414 * that certain asics don't have (all asics).
 415 */
 416static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 417{
 418        DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 419                  reg, v);
 420        BUG();
 421}
 422
 423/**
 424 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 425 *
 426 * @adev: amdgpu device pointer
 427 * @reg: offset of register
 428 *
 429 * Dummy register read function.  Used for register blocks
 430 * that certain asics don't have (all asics).
 431 * Returns the value in the register.
 432 */
 433static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 434{
 435        DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 436        BUG();
 437        return 0;
 438}
 439
 440/**
 441 * amdgpu_invalid_wreg64 - dummy reg write function
 442 *
 443 * @adev: amdgpu device pointer
 444 * @reg: offset of register
 445 * @v: value to write to the register
 446 *
 447 * Dummy register read function.  Used for register blocks
 448 * that certain asics don't have (all asics).
 449 */
 450static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 451{
 452        DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 453                  reg, v);
 454        BUG();
 455}
 456
 457/**
 458 * amdgpu_block_invalid_rreg - dummy reg read function
 459 *
 460 * @adev: amdgpu device pointer
 461 * @block: offset of instance
 462 * @reg: offset of register
 463 *
 464 * Dummy register read function.  Used for register blocks
 465 * that certain asics don't have (all asics).
 466 * Returns the value in the register.
 467 */
 468static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 469                                          uint32_t block, uint32_t reg)
 470{
 471        DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 472                  reg, block);
 473        BUG();
 474        return 0;
 475}
 476
 477/**
 478 * amdgpu_block_invalid_wreg - dummy reg write function
 479 *
 480 * @adev: amdgpu device pointer
 481 * @block: offset of instance
 482 * @reg: offset of register
 483 * @v: value to write to the register
 484 *
 485 * Dummy register read function.  Used for register blocks
 486 * that certain asics don't have (all asics).
 487 */
 488static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 489                                      uint32_t block,
 490                                      uint32_t reg, uint32_t v)
 491{
 492        DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 493                  reg, block, v);
 494        BUG();
 495}
 496
 497/**
 498 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 499 *
 500 * @adev: amdgpu device pointer
 501 *
 502 * Allocates a scratch page of VRAM for use by various things in the
 503 * driver.
 504 */
 505static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 506{
 507        return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 508                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 509                                       &adev->vram_scratch.robj,
 510                                       &adev->vram_scratch.gpu_addr,
 511                                       (void **)&adev->vram_scratch.ptr);
 512}
 513
 514/**
 515 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 516 *
 517 * @adev: amdgpu device pointer
 518 *
 519 * Frees the VRAM scratch page.
 520 */
 521static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 522{
 523        amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 524}
 525
 526/**
 527 * amdgpu_device_program_register_sequence - program an array of registers.
 528 *
 529 * @adev: amdgpu_device pointer
 530 * @registers: pointer to the register array
 531 * @array_size: size of the register array
 532 *
 533 * Programs an array or registers with and and or masks.
 534 * This is a helper for setting golden registers.
 535 */
 536void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 537                                             const u32 *registers,
 538                                             const u32 array_size)
 539{
 540        u32 tmp, reg, and_mask, or_mask;
 541        int i;
 542
 543        if (array_size % 3)
 544                return;
 545
 546        for (i = 0; i < array_size; i +=3) {
 547                reg = registers[i + 0];
 548                and_mask = registers[i + 1];
 549                or_mask = registers[i + 2];
 550
 551                if (and_mask == 0xffffffff) {
 552                        tmp = or_mask;
 553                } else {
 554                        tmp = RREG32(reg);
 555                        tmp &= ~and_mask;
 556                        if (adev->family >= AMDGPU_FAMILY_AI)
 557                                tmp |= (or_mask & and_mask);
 558                        else
 559                                tmp |= or_mask;
 560                }
 561                WREG32(reg, tmp);
 562        }
 563}
 564
 565/**
 566 * amdgpu_device_pci_config_reset - reset the GPU
 567 *
 568 * @adev: amdgpu_device pointer
 569 *
 570 * Resets the GPU using the pci config reset sequence.
 571 * Only applicable to asics prior to vega10.
 572 */
 573void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 574{
 575        pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 576}
 577
 578/*
 579 * GPU doorbell aperture helpers function.
 580 */
 581/**
 582 * amdgpu_device_doorbell_init - Init doorbell driver information.
 583 *
 584 * @adev: amdgpu_device pointer
 585 *
 586 * Init doorbell driver information (CIK)
 587 * Returns 0 on success, error on failure.
 588 */
 589static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 590{
 591
 592        /* No doorbell on SI hardware generation */
 593        if (adev->asic_type < CHIP_BONAIRE) {
 594                adev->doorbell.base = 0;
 595                adev->doorbell.size = 0;
 596                adev->doorbell.num_doorbells = 0;
 597                adev->doorbell.ptr = NULL;
 598                return 0;
 599        }
 600
 601        if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 602                return -EINVAL;
 603
 604        amdgpu_asic_init_doorbell_index(adev);
 605
 606        /* doorbell bar mapping */
 607        adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 608        adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 609
 610        adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 611                                             adev->doorbell_index.max_assignment+1);
 612        if (adev->doorbell.num_doorbells == 0)
 613                return -EINVAL;
 614
 615        /* For Vega, reserve and map two pages on doorbell BAR since SDMA
 616         * paging queue doorbell use the second page. The
 617         * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
 618         * doorbells are in the first page. So with paging queue enabled,
 619         * the max num_doorbells should + 1 page (0x400 in dword)
 620         */
 621        if (adev->asic_type >= CHIP_VEGA10)
 622                adev->doorbell.num_doorbells += 0x400;
 623
 624        adev->doorbell.ptr = ioremap(adev->doorbell.base,
 625                                     adev->doorbell.num_doorbells *
 626                                     sizeof(u32));
 627        if (adev->doorbell.ptr == NULL)
 628                return -ENOMEM;
 629
 630        return 0;
 631}
 632
 633/**
 634 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
 635 *
 636 * @adev: amdgpu_device pointer
 637 *
 638 * Tear down doorbell driver information (CIK)
 639 */
 640static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
 641{
 642        iounmap(adev->doorbell.ptr);
 643        adev->doorbell.ptr = NULL;
 644}
 645
 646
 647
 648/*
 649 * amdgpu_device_wb_*()
 650 * Writeback is the method by which the GPU updates special pages in memory
 651 * with the status of certain GPU events (fences, ring pointers,etc.).
 652 */
 653
 654/**
 655 * amdgpu_device_wb_fini - Disable Writeback and free memory
 656 *
 657 * @adev: amdgpu_device pointer
 658 *
 659 * Disables Writeback and frees the Writeback memory (all asics).
 660 * Used at driver shutdown.
 661 */
 662static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
 663{
 664        if (adev->wb.wb_obj) {
 665                amdgpu_bo_free_kernel(&adev->wb.wb_obj,
 666                                      &adev->wb.gpu_addr,
 667                                      (void **)&adev->wb.wb);
 668                adev->wb.wb_obj = NULL;
 669        }
 670}
 671
 672/**
 673 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
 674 *
 675 * @adev: amdgpu_device pointer
 676 *
 677 * Initializes writeback and allocates writeback memory (all asics).
 678 * Used at driver startup.
 679 * Returns 0 on success or an -error on failure.
 680 */
 681static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 682{
 683        int r;
 684
 685        if (adev->wb.wb_obj == NULL) {
 686                /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
 687                r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
 688                                            PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 689                                            &adev->wb.wb_obj, &adev->wb.gpu_addr,
 690                                            (void **)&adev->wb.wb);
 691                if (r) {
 692                        dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
 693                        return r;
 694                }
 695
 696                adev->wb.num_wb = AMDGPU_MAX_WB;
 697                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 698
 699                /* clear wb memory */
 700                memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 701        }
 702
 703        return 0;
 704}
 705
 706/**
 707 * amdgpu_device_wb_get - Allocate a wb entry
 708 *
 709 * @adev: amdgpu_device pointer
 710 * @wb: wb index
 711 *
 712 * Allocate a wb slot for use by the driver (all asics).
 713 * Returns 0 on success or -EINVAL on failure.
 714 */
 715int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 716{
 717        unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 718
 719        if (offset < adev->wb.num_wb) {
 720                __set_bit(offset, adev->wb.used);
 721                *wb = offset << 3; /* convert to dw offset */
 722                return 0;
 723        } else {
 724                return -EINVAL;
 725        }
 726}
 727
 728/**
 729 * amdgpu_device_wb_free - Free a wb entry
 730 *
 731 * @adev: amdgpu_device pointer
 732 * @wb: wb index
 733 *
 734 * Free a wb slot allocated for use by the driver (all asics)
 735 */
 736void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 737{
 738        wb >>= 3;
 739        if (wb < adev->wb.num_wb)
 740                __clear_bit(wb, adev->wb.used);
 741}
 742
 743/**
 744 * amdgpu_device_resize_fb_bar - try to resize FB BAR
 745 *
 746 * @adev: amdgpu_device pointer
 747 *
 748 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
 749 * to fail, but if any of the BARs is not accessible after the size we abort
 750 * driver loading by returning -ENODEV.
 751 */
 752int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 753{
 754        u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 755        u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 756        struct pci_bus *root;
 757        struct resource *res;
 758        unsigned i;
 759        u16 cmd;
 760        int r;
 761
 762        /* Bypass for VF */
 763        if (amdgpu_sriov_vf(adev))
 764                return 0;
 765
 766        /* Check if the root BUS has 64bit memory resources */
 767        root = adev->pdev->bus;
 768        while (root->parent)
 769                root = root->parent;
 770
 771        pci_bus_for_each_resource(root, res, i) {
 772                if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 773                    res->start > 0x100000000ull)
 774                        break;
 775        }
 776
 777        /* Trying to resize is pointless without a root hub window above 4GB */
 778        if (!res)
 779                return 0;
 780
 781        /* Disable memory decoding while we change the BAR addresses and size */
 782        pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
 783        pci_write_config_word(adev->pdev, PCI_COMMAND,
 784                              cmd & ~PCI_COMMAND_MEMORY);
 785
 786        /* Free the VRAM and doorbell BAR, we most likely need to move both. */
 787        amdgpu_device_doorbell_fini(adev);
 788        if (adev->asic_type >= CHIP_BONAIRE)
 789                pci_release_resource(adev->pdev, 2);
 790
 791        pci_release_resource(adev->pdev, 0);
 792
 793        r = pci_resize_resource(adev->pdev, 0, rbar_size);
 794        if (r == -ENOSPC)
 795                DRM_INFO("Not enough PCI address space for a large BAR.");
 796        else if (r && r != -ENOTSUPP)
 797                DRM_ERROR("Problem resizing BAR0 (%d).", r);
 798
 799        pci_assign_unassigned_bus_resources(adev->pdev->bus);
 800
 801        /* When the doorbell or fb BAR isn't available we have no chance of
 802         * using the device.
 803         */
 804        r = amdgpu_device_doorbell_init(adev);
 805        if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
 806                return -ENODEV;
 807
 808        pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
 809
 810        return 0;
 811}
 812
 813/*
 814 * GPU helpers function.
 815 */
 816/**
 817 * amdgpu_device_need_post - check if the hw need post or not
 818 *
 819 * @adev: amdgpu_device pointer
 820 *
 821 * Check if the asic has been initialized (all asics) at driver startup
 822 * or post is needed if  hw reset is performed.
 823 * Returns true if need or false if not.
 824 */
 825bool amdgpu_device_need_post(struct amdgpu_device *adev)
 826{
 827        uint32_t reg;
 828
 829        if (amdgpu_sriov_vf(adev))
 830                return false;
 831
 832        if (amdgpu_passthrough(adev)) {
 833                /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
 834                 * some old smc fw still need driver do vPost otherwise gpu hang, while
 835                 * those smc fw version above 22.15 doesn't have this flaw, so we force
 836                 * vpost executed for smc version below 22.15
 837                 */
 838                if (adev->asic_type == CHIP_FIJI) {
 839                        int err;
 840                        uint32_t fw_ver;
 841                        err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
 842                        /* force vPost if error occured */
 843                        if (err)
 844                                return true;
 845
 846                        fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
 847                        if (fw_ver < 0x00160e00)
 848                                return true;
 849                }
 850        }
 851
 852        if (adev->has_hw_reset) {
 853                adev->has_hw_reset = false;
 854                return true;
 855        }
 856
 857        /* bios scratch used on CIK+ */
 858        if (adev->asic_type >= CHIP_BONAIRE)
 859                return amdgpu_atombios_scratch_need_asic_init(adev);
 860
 861        /* check MEM_SIZE for older asics */
 862        reg = amdgpu_asic_get_config_memsize(adev);
 863
 864        if ((reg != 0) && (reg != 0xffffffff))
 865                return false;
 866
 867        return true;
 868}
 869
 870/* if we get transitioned to only one device, take VGA back */
 871/**
 872 * amdgpu_device_vga_set_decode - enable/disable vga decode
 873 *
 874 * @cookie: amdgpu_device pointer
 875 * @state: enable/disable vga decode
 876 *
 877 * Enable/disable vga decode (all asics).
 878 * Returns VGA resource flags.
 879 */
 880static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
 881{
 882        struct amdgpu_device *adev = cookie;
 883        amdgpu_asic_set_vga_state(adev, state);
 884        if (state)
 885                return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
 886                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 887        else
 888                return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 889}
 890
 891/**
 892 * amdgpu_device_check_block_size - validate the vm block size
 893 *
 894 * @adev: amdgpu_device pointer
 895 *
 896 * Validates the vm block size specified via module parameter.
 897 * The vm block size defines number of bits in page table versus page directory,
 898 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 899 * page table and the remaining bits are in the page directory.
 900 */
 901static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 902{
 903        /* defines number of bits in page table versus page directory,
 904         * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 905         * page table and the remaining bits are in the page directory */
 906        if (amdgpu_vm_block_size == -1)
 907                return;
 908
 909        if (amdgpu_vm_block_size < 9) {
 910                dev_warn(adev->dev, "VM page table size (%d) too small\n",
 911                         amdgpu_vm_block_size);
 912                amdgpu_vm_block_size = -1;
 913        }
 914}
 915
 916/**
 917 * amdgpu_device_check_vm_size - validate the vm size
 918 *
 919 * @adev: amdgpu_device pointer
 920 *
 921 * Validates the vm size in GB specified via module parameter.
 922 * The VM size is the size of the GPU virtual memory space in GB.
 923 */
 924static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 925{
 926        /* no need to check the default value */
 927        if (amdgpu_vm_size == -1)
 928                return;
 929
 930        if (amdgpu_vm_size < 1) {
 931                dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
 932                         amdgpu_vm_size);
 933                amdgpu_vm_size = -1;
 934        }
 935}
 936
 937static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
 938{
 939        struct sysinfo si;
 940        bool is_os_64 = (sizeof(void *) == 8) ? true : false;
 941        uint64_t total_memory;
 942        uint64_t dram_size_seven_GB = 0x1B8000000;
 943        uint64_t dram_size_three_GB = 0xB8000000;
 944
 945        if (amdgpu_smu_memory_pool_size == 0)
 946                return;
 947
 948        if (!is_os_64) {
 949                DRM_WARN("Not 64-bit OS, feature not supported\n");
 950                goto def_value;
 951        }
 952        si_meminfo(&si);
 953        total_memory = (uint64_t)si.totalram * si.mem_unit;
 954
 955        if ((amdgpu_smu_memory_pool_size == 1) ||
 956                (amdgpu_smu_memory_pool_size == 2)) {
 957                if (total_memory < dram_size_three_GB)
 958                        goto def_value1;
 959        } else if ((amdgpu_smu_memory_pool_size == 4) ||
 960                (amdgpu_smu_memory_pool_size == 8)) {
 961                if (total_memory < dram_size_seven_GB)
 962                        goto def_value1;
 963        } else {
 964                DRM_WARN("Smu memory pool size not supported\n");
 965                goto def_value;
 966        }
 967        adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
 968
 969        return;
 970
 971def_value1:
 972        DRM_WARN("No enough system memory\n");
 973def_value:
 974        adev->pm.smu_prv_buffer_size = 0;
 975}
 976
 977/**
 978 * amdgpu_device_check_arguments - validate module params
 979 *
 980 * @adev: amdgpu_device pointer
 981 *
 982 * Validates certain module parameters and updates
 983 * the associated values used by the driver (all asics).
 984 */
 985static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 986{
 987        int ret = 0;
 988
 989        if (amdgpu_sched_jobs < 4) {
 990                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 991                         amdgpu_sched_jobs);
 992                amdgpu_sched_jobs = 4;
 993        } else if (!is_power_of_2(amdgpu_sched_jobs)){
 994                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
 995                         amdgpu_sched_jobs);
 996                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
 997        }
 998
 999        if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1000                /* gart size must be greater or equal to 32M */
1001                dev_warn(adev->dev, "gart size (%d) too small\n",
1002                         amdgpu_gart_size);
1003                amdgpu_gart_size = -1;
1004        }
1005
1006        if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1007                /* gtt size must be greater or equal to 32M */
1008                dev_warn(adev->dev, "gtt size (%d) too small\n",
1009                                 amdgpu_gtt_size);
1010                amdgpu_gtt_size = -1;
1011        }
1012
1013        /* valid range is between 4 and 9 inclusive */
1014        if (amdgpu_vm_fragment_size != -1 &&
1015            (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1016                dev_warn(adev->dev, "valid range is between 4 and 9\n");
1017                amdgpu_vm_fragment_size = -1;
1018        }
1019
1020        amdgpu_device_check_smu_prv_buffer_size(adev);
1021
1022        amdgpu_device_check_vm_size(adev);
1023
1024        amdgpu_device_check_block_size(adev);
1025
1026        ret = amdgpu_device_get_job_timeout_settings(adev);
1027        if (ret) {
1028                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1029                return ret;
1030        }
1031
1032        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1033
1034        return ret;
1035}
1036
1037/**
1038 * amdgpu_switcheroo_set_state - set switcheroo state
1039 *
1040 * @pdev: pci dev pointer
1041 * @state: vga_switcheroo state
1042 *
1043 * Callback for the switcheroo driver.  Suspends or resumes the
1044 * the asics before or after it is powered up using ACPI methods.
1045 */
1046static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1047{
1048        struct drm_device *dev = pci_get_drvdata(pdev);
1049
1050        if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1051                return;
1052
1053        if (state == VGA_SWITCHEROO_ON) {
1054                pr_info("amdgpu: switched on\n");
1055                /* don't suspend or resume card normally */
1056                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1057
1058                amdgpu_device_resume(dev, true, true);
1059
1060                dev->switch_power_state = DRM_SWITCH_POWER_ON;
1061                drm_kms_helper_poll_enable(dev);
1062        } else {
1063                pr_info("amdgpu: switched off\n");
1064                drm_kms_helper_poll_disable(dev);
1065                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1066                amdgpu_device_suspend(dev, true, true);
1067                dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1068        }
1069}
1070
1071/**
1072 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1073 *
1074 * @pdev: pci dev pointer
1075 *
1076 * Callback for the switcheroo driver.  Check of the switcheroo
1077 * state can be changed.
1078 * Returns true if the state can be changed, false if not.
1079 */
1080static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1081{
1082        struct drm_device *dev = pci_get_drvdata(pdev);
1083
1084        /*
1085        * FIXME: open_count is protected by drm_global_mutex but that would lead to
1086        * locking inversion with the driver load path. And the access here is
1087        * completely racy anyway. So don't bother with locking for now.
1088        */
1089        return dev->open_count == 0;
1090}
1091
1092static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1093        .set_gpu_state = amdgpu_switcheroo_set_state,
1094        .reprobe = NULL,
1095        .can_switch = amdgpu_switcheroo_can_switch,
1096};
1097
1098/**
1099 * amdgpu_device_ip_set_clockgating_state - set the CG state
1100 *
1101 * @dev: amdgpu_device pointer
1102 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1103 * @state: clockgating state (gate or ungate)
1104 *
1105 * Sets the requested clockgating state for all instances of
1106 * the hardware IP specified.
1107 * Returns the error code from the last instance.
1108 */
1109int amdgpu_device_ip_set_clockgating_state(void *dev,
1110                                           enum amd_ip_block_type block_type,
1111                                           enum amd_clockgating_state state)
1112{
1113        struct amdgpu_device *adev = dev;
1114        int i, r = 0;
1115
1116        for (i = 0; i < adev->num_ip_blocks; i++) {
1117                if (!adev->ip_blocks[i].status.valid)
1118                        continue;
1119                if (adev->ip_blocks[i].version->type != block_type)
1120                        continue;
1121                if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1122                        continue;
1123                r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1124                        (void *)adev, state);
1125                if (r)
1126                        DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1127                                  adev->ip_blocks[i].version->funcs->name, r);
1128        }
1129        return r;
1130}
1131
1132/**
1133 * amdgpu_device_ip_set_powergating_state - set the PG state
1134 *
1135 * @dev: amdgpu_device pointer
1136 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1137 * @state: powergating state (gate or ungate)
1138 *
1139 * Sets the requested powergating state for all instances of
1140 * the hardware IP specified.
1141 * Returns the error code from the last instance.
1142 */
1143int amdgpu_device_ip_set_powergating_state(void *dev,
1144                                           enum amd_ip_block_type block_type,
1145                                           enum amd_powergating_state state)
1146{
1147        struct amdgpu_device *adev = dev;
1148        int i, r = 0;
1149
1150        for (i = 0; i < adev->num_ip_blocks; i++) {
1151                if (!adev->ip_blocks[i].status.valid)
1152                        continue;
1153                if (adev->ip_blocks[i].version->type != block_type)
1154                        continue;
1155                if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1156                        continue;
1157                r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1158                        (void *)adev, state);
1159                if (r)
1160                        DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1161                                  adev->ip_blocks[i].version->funcs->name, r);
1162        }
1163        return r;
1164}
1165
1166/**
1167 * amdgpu_device_ip_get_clockgating_state - get the CG state
1168 *
1169 * @adev: amdgpu_device pointer
1170 * @flags: clockgating feature flags
1171 *
1172 * Walks the list of IPs on the device and updates the clockgating
1173 * flags for each IP.
1174 * Updates @flags with the feature flags for each hardware IP where
1175 * clockgating is enabled.
1176 */
1177void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1178                                            u32 *flags)
1179{
1180        int i;
1181
1182        for (i = 0; i < adev->num_ip_blocks; i++) {
1183                if (!adev->ip_blocks[i].status.valid)
1184                        continue;
1185                if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1186                        adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1187        }
1188}
1189
1190/**
1191 * amdgpu_device_ip_wait_for_idle - wait for idle
1192 *
1193 * @adev: amdgpu_device pointer
1194 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1195 *
1196 * Waits for the request hardware IP to be idle.
1197 * Returns 0 for success or a negative error code on failure.
1198 */
1199int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1200                                   enum amd_ip_block_type block_type)
1201{
1202        int i, r;
1203
1204        for (i = 0; i < adev->num_ip_blocks; i++) {
1205                if (!adev->ip_blocks[i].status.valid)
1206                        continue;
1207                if (adev->ip_blocks[i].version->type == block_type) {
1208                        r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1209                        if (r)
1210                                return r;
1211                        break;
1212                }
1213        }
1214        return 0;
1215
1216}
1217
1218/**
1219 * amdgpu_device_ip_is_idle - is the hardware IP idle
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1223 *
1224 * Check if the hardware IP is idle or not.
1225 * Returns true if it the IP is idle, false if not.
1226 */
1227bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1228                              enum amd_ip_block_type block_type)
1229{
1230        int i;
1231
1232        for (i = 0; i < adev->num_ip_blocks; i++) {
1233                if (!adev->ip_blocks[i].status.valid)
1234                        continue;
1235                if (adev->ip_blocks[i].version->type == block_type)
1236                        return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1237        }
1238        return true;
1239
1240}
1241
1242/**
1243 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1244 *
1245 * @adev: amdgpu_device pointer
1246 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1247 *
1248 * Returns a pointer to the hardware IP block structure
1249 * if it exists for the asic, otherwise NULL.
1250 */
1251struct amdgpu_ip_block *
1252amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1253                              enum amd_ip_block_type type)
1254{
1255        int i;
1256
1257        for (i = 0; i < adev->num_ip_blocks; i++)
1258                if (adev->ip_blocks[i].version->type == type)
1259                        return &adev->ip_blocks[i];
1260
1261        return NULL;
1262}
1263
1264/**
1265 * amdgpu_device_ip_block_version_cmp
1266 *
1267 * @adev: amdgpu_device pointer
1268 * @type: enum amd_ip_block_type
1269 * @major: major version
1270 * @minor: minor version
1271 *
1272 * return 0 if equal or greater
1273 * return 1 if smaller or the ip_block doesn't exist
1274 */
1275int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1276                                       enum amd_ip_block_type type,
1277                                       u32 major, u32 minor)
1278{
1279        struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1280
1281        if (ip_block && ((ip_block->version->major > major) ||
1282                        ((ip_block->version->major == major) &&
1283                        (ip_block->version->minor >= minor))))
1284                return 0;
1285
1286        return 1;
1287}
1288
1289/**
1290 * amdgpu_device_ip_block_add
1291 *
1292 * @adev: amdgpu_device pointer
1293 * @ip_block_version: pointer to the IP to add
1294 *
1295 * Adds the IP block driver information to the collection of IPs
1296 * on the asic.
1297 */
1298int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1299                               const struct amdgpu_ip_block_version *ip_block_version)
1300{
1301        if (!ip_block_version)
1302                return -EINVAL;
1303
1304        DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1305                  ip_block_version->funcs->name);
1306
1307        adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1308
1309        return 0;
1310}
1311
1312/**
1313 * amdgpu_device_enable_virtual_display - enable virtual display feature
1314 *
1315 * @adev: amdgpu_device pointer
1316 *
1317 * Enabled the virtual display feature if the user has enabled it via
1318 * the module parameter virtual_display.  This feature provides a virtual
1319 * display hardware on headless boards or in virtualized environments.
1320 * This function parses and validates the configuration string specified by
1321 * the user and configues the virtual display configuration (number of
1322 * virtual connectors, crtcs, etc.) specified.
1323 */
1324static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1325{
1326        adev->enable_virtual_display = false;
1327
1328        if (amdgpu_virtual_display) {
1329                struct drm_device *ddev = adev->ddev;
1330                const char *pci_address_name = pci_name(ddev->pdev);
1331                char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1332
1333                pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1334                pciaddstr_tmp = pciaddstr;
1335                while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1336                        pciaddname = strsep(&pciaddname_tmp, ",");
1337                        if (!strcmp("all", pciaddname)
1338                            || !strcmp(pci_address_name, pciaddname)) {
1339                                long num_crtc;
1340                                int res = -1;
1341
1342                                adev->enable_virtual_display = true;
1343
1344                                if (pciaddname_tmp)
1345                                        res = kstrtol(pciaddname_tmp, 10,
1346                                                      &num_crtc);
1347
1348                                if (!res) {
1349                                        if (num_crtc < 1)
1350                                                num_crtc = 1;
1351                                        if (num_crtc > 6)
1352                                                num_crtc = 6;
1353                                        adev->mode_info.num_crtc = num_crtc;
1354                                } else {
1355                                        adev->mode_info.num_crtc = 1;
1356                                }
1357                                break;
1358                        }
1359                }
1360
1361                DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1362                         amdgpu_virtual_display, pci_address_name,
1363                         adev->enable_virtual_display, adev->mode_info.num_crtc);
1364
1365                kfree(pciaddstr);
1366        }
1367}
1368
1369/**
1370 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1371 *
1372 * @adev: amdgpu_device pointer
1373 *
1374 * Parses the asic configuration parameters specified in the gpu info
1375 * firmware and makes them availale to the driver for use in configuring
1376 * the asic.
1377 * Returns 0 on success, -EINVAL on failure.
1378 */
1379static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1380{
1381        const char *chip_name;
1382        char fw_name[30];
1383        int err;
1384        const struct gpu_info_firmware_header_v1_0 *hdr;
1385
1386        adev->firmware.gpu_info_fw = NULL;
1387
1388        switch (adev->asic_type) {
1389        case CHIP_TOPAZ:
1390        case CHIP_TONGA:
1391        case CHIP_FIJI:
1392        case CHIP_POLARIS10:
1393        case CHIP_POLARIS11:
1394        case CHIP_POLARIS12:
1395        case CHIP_VEGAM:
1396        case CHIP_CARRIZO:
1397        case CHIP_STONEY:
1398#ifdef CONFIG_DRM_AMDGPU_SI
1399        case CHIP_VERDE:
1400        case CHIP_TAHITI:
1401        case CHIP_PITCAIRN:
1402        case CHIP_OLAND:
1403        case CHIP_HAINAN:
1404#endif
1405#ifdef CONFIG_DRM_AMDGPU_CIK
1406        case CHIP_BONAIRE:
1407        case CHIP_HAWAII:
1408        case CHIP_KAVERI:
1409        case CHIP_KABINI:
1410        case CHIP_MULLINS:
1411#endif
1412        case CHIP_VEGA20:
1413        default:
1414                return 0;
1415        case CHIP_VEGA10:
1416                chip_name = "vega10";
1417                break;
1418        case CHIP_VEGA12:
1419                chip_name = "vega12";
1420                break;
1421        case CHIP_RAVEN:
1422                if (adev->rev_id >= 8)
1423                        chip_name = "raven2";
1424                else if (adev->pdev->device == 0x15d8)
1425                        chip_name = "picasso";
1426                else
1427                        chip_name = "raven";
1428                break;
1429        case CHIP_ARCTURUS:
1430                chip_name = "arcturus";
1431                break;
1432        case CHIP_RENOIR:
1433                chip_name = "renoir";
1434                break;
1435        case CHIP_NAVI10:
1436                chip_name = "navi10";
1437                break;
1438        case CHIP_NAVI14:
1439                chip_name = "navi14";
1440                break;
1441        case CHIP_NAVI12:
1442                chip_name = "navi12";
1443                break;
1444        }
1445
1446        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1447        err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1448        if (err) {
1449                dev_err(adev->dev,
1450                        "Failed to load gpu_info firmware \"%s\"\n",
1451                        fw_name);
1452                goto out;
1453        }
1454        err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1455        if (err) {
1456                dev_err(adev->dev,
1457                        "Failed to validate gpu_info firmware \"%s\"\n",
1458                        fw_name);
1459                goto out;
1460        }
1461
1462        hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1463        amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1464
1465        switch (hdr->version_major) {
1466        case 1:
1467        {
1468                const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1469                        (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1470                                                                le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1471
1472                adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1473                adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1474                adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1475                adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1476                adev->gfx.config.max_texture_channel_caches =
1477                        le32_to_cpu(gpu_info_fw->gc_num_tccs);
1478                adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1479                adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1480                adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1481                adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1482                adev->gfx.config.double_offchip_lds_buf =
1483                        le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1484                adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1485                adev->gfx.cu_info.max_waves_per_simd =
1486                        le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1487                adev->gfx.cu_info.max_scratch_slots_per_cu =
1488                        le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1489                adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1490                if (hdr->version_minor >= 1) {
1491                        const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1492                                (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1493                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1494                        adev->gfx.config.num_sc_per_sh =
1495                                le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1496                        adev->gfx.config.num_packer_per_sc =
1497                                le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1498                }
1499#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1500                if (hdr->version_minor == 2) {
1501                        const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1502                                (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1503                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1504                        adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1505                }
1506#endif
1507                break;
1508        }
1509        default:
1510                dev_err(adev->dev,
1511                        "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1512                err = -EINVAL;
1513                goto out;
1514        }
1515out:
1516        return err;
1517}
1518
1519/**
1520 * amdgpu_device_ip_early_init - run early init for hardware IPs
1521 *
1522 * @adev: amdgpu_device pointer
1523 *
1524 * Early initialization pass for hardware IPs.  The hardware IPs that make
1525 * up each asic are discovered each IP's early_init callback is run.  This
1526 * is the first stage in initializing the asic.
1527 * Returns 0 on success, negative error code on failure.
1528 */
1529static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1530{
1531        int i, r;
1532
1533        amdgpu_device_enable_virtual_display(adev);
1534
1535        switch (adev->asic_type) {
1536        case CHIP_TOPAZ:
1537        case CHIP_TONGA:
1538        case CHIP_FIJI:
1539        case CHIP_POLARIS10:
1540        case CHIP_POLARIS11:
1541        case CHIP_POLARIS12:
1542        case CHIP_VEGAM:
1543        case CHIP_CARRIZO:
1544        case CHIP_STONEY:
1545                if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1546                        adev->family = AMDGPU_FAMILY_CZ;
1547                else
1548                        adev->family = AMDGPU_FAMILY_VI;
1549
1550                r = vi_set_ip_blocks(adev);
1551                if (r)
1552                        return r;
1553                break;
1554#ifdef CONFIG_DRM_AMDGPU_SI
1555        case CHIP_VERDE:
1556        case CHIP_TAHITI:
1557        case CHIP_PITCAIRN:
1558        case CHIP_OLAND:
1559        case CHIP_HAINAN:
1560                adev->family = AMDGPU_FAMILY_SI;
1561                r = si_set_ip_blocks(adev);
1562                if (r)
1563                        return r;
1564                break;
1565#endif
1566#ifdef CONFIG_DRM_AMDGPU_CIK
1567        case CHIP_BONAIRE:
1568        case CHIP_HAWAII:
1569        case CHIP_KAVERI:
1570        case CHIP_KABINI:
1571        case CHIP_MULLINS:
1572                if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1573                        adev->family = AMDGPU_FAMILY_CI;
1574                else
1575                        adev->family = AMDGPU_FAMILY_KV;
1576
1577                r = cik_set_ip_blocks(adev);
1578                if (r)
1579                        return r;
1580                break;
1581#endif
1582        case CHIP_VEGA10:
1583        case CHIP_VEGA12:
1584        case CHIP_VEGA20:
1585        case CHIP_RAVEN:
1586        case CHIP_ARCTURUS:
1587        case CHIP_RENOIR:
1588                if (adev->asic_type == CHIP_RAVEN ||
1589                    adev->asic_type == CHIP_RENOIR)
1590                        adev->family = AMDGPU_FAMILY_RV;
1591                else
1592                        adev->family = AMDGPU_FAMILY_AI;
1593
1594                r = soc15_set_ip_blocks(adev);
1595                if (r)
1596                        return r;
1597                break;
1598        case  CHIP_NAVI10:
1599        case  CHIP_NAVI14:
1600        case  CHIP_NAVI12:
1601                adev->family = AMDGPU_FAMILY_NV;
1602
1603                r = nv_set_ip_blocks(adev);
1604                if (r)
1605                        return r;
1606                break;
1607        default:
1608                /* FIXME: not supported yet */
1609                return -EINVAL;
1610        }
1611
1612        r = amdgpu_device_parse_gpu_info_fw(adev);
1613        if (r)
1614                return r;
1615
1616        amdgpu_amdkfd_device_probe(adev);
1617
1618        if (amdgpu_sriov_vf(adev)) {
1619                r = amdgpu_virt_request_full_gpu(adev, true);
1620                if (r)
1621                        return -EAGAIN;
1622        }
1623
1624        adev->pm.pp_feature = amdgpu_pp_feature_mask;
1625        if (amdgpu_sriov_vf(adev))
1626                adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1627
1628        for (i = 0; i < adev->num_ip_blocks; i++) {
1629                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1630                        DRM_ERROR("disabled ip block: %d <%s>\n",
1631                                  i, adev->ip_blocks[i].version->funcs->name);
1632                        adev->ip_blocks[i].status.valid = false;
1633                } else {
1634                        if (adev->ip_blocks[i].version->funcs->early_init) {
1635                                r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1636                                if (r == -ENOENT) {
1637                                        adev->ip_blocks[i].status.valid = false;
1638                                } else if (r) {
1639                                        DRM_ERROR("early_init of IP block <%s> failed %d\n",
1640                                                  adev->ip_blocks[i].version->funcs->name, r);
1641                                        return r;
1642                                } else {
1643                                        adev->ip_blocks[i].status.valid = true;
1644                                }
1645                        } else {
1646                                adev->ip_blocks[i].status.valid = true;
1647                        }
1648                }
1649                /* get the vbios after the asic_funcs are set up */
1650                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1651                        /* Read BIOS */
1652                        if (!amdgpu_get_bios(adev))
1653                                return -EINVAL;
1654
1655                        r = amdgpu_atombios_init(adev);
1656                        if (r) {
1657                                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1658                                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1659                                return r;
1660                        }
1661                }
1662        }
1663
1664        adev->cg_flags &= amdgpu_cg_mask;
1665        adev->pg_flags &= amdgpu_pg_mask;
1666
1667        return 0;
1668}
1669
1670static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1671{
1672        int i, r;
1673
1674        for (i = 0; i < adev->num_ip_blocks; i++) {
1675                if (!adev->ip_blocks[i].status.sw)
1676                        continue;
1677                if (adev->ip_blocks[i].status.hw)
1678                        continue;
1679                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1680                    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1681                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1682                        r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1683                        if (r) {
1684                                DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1685                                          adev->ip_blocks[i].version->funcs->name, r);
1686                                return r;
1687                        }
1688                        adev->ip_blocks[i].status.hw = true;
1689                }
1690        }
1691
1692        return 0;
1693}
1694
1695static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1696{
1697        int i, r;
1698
1699        for (i = 0; i < adev->num_ip_blocks; i++) {
1700                if (!adev->ip_blocks[i].status.sw)
1701                        continue;
1702                if (adev->ip_blocks[i].status.hw)
1703                        continue;
1704                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1705                if (r) {
1706                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1707                                  adev->ip_blocks[i].version->funcs->name, r);
1708                        return r;
1709                }
1710                adev->ip_blocks[i].status.hw = true;
1711        }
1712
1713        return 0;
1714}
1715
1716static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1717{
1718        int r = 0;
1719        int i;
1720        uint32_t smu_version;
1721
1722        if (adev->asic_type >= CHIP_VEGA10) {
1723                for (i = 0; i < adev->num_ip_blocks; i++) {
1724                        if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1725                                continue;
1726
1727                        /* no need to do the fw loading again if already done*/
1728                        if (adev->ip_blocks[i].status.hw == true)
1729                                break;
1730
1731                        if (adev->in_gpu_reset || adev->in_suspend) {
1732                                r = adev->ip_blocks[i].version->funcs->resume(adev);
1733                                if (r) {
1734                                        DRM_ERROR("resume of IP block <%s> failed %d\n",
1735                                                          adev->ip_blocks[i].version->funcs->name, r);
1736                                        return r;
1737                                }
1738                        } else {
1739                                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1740                                if (r) {
1741                                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1742                                                          adev->ip_blocks[i].version->funcs->name, r);
1743                                        return r;
1744                                }
1745                        }
1746
1747                        adev->ip_blocks[i].status.hw = true;
1748                        break;
1749                }
1750        }
1751
1752        r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1753
1754        return r;
1755}
1756
1757/**
1758 * amdgpu_device_ip_init - run init for hardware IPs
1759 *
1760 * @adev: amdgpu_device pointer
1761 *
1762 * Main initialization pass for hardware IPs.  The list of all the hardware
1763 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1764 * are run.  sw_init initializes the software state associated with each IP
1765 * and hw_init initializes the hardware associated with each IP.
1766 * Returns 0 on success, negative error code on failure.
1767 */
1768static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1769{
1770        int i, r;
1771
1772        r = amdgpu_ras_init(adev);
1773        if (r)
1774                return r;
1775
1776        for (i = 0; i < adev->num_ip_blocks; i++) {
1777                if (!adev->ip_blocks[i].status.valid)
1778                        continue;
1779                r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1780                if (r) {
1781                        DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1782                                  adev->ip_blocks[i].version->funcs->name, r);
1783                        goto init_failed;
1784                }
1785                adev->ip_blocks[i].status.sw = true;
1786
1787                /* need to do gmc hw init early so we can allocate gpu mem */
1788                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1789                        r = amdgpu_device_vram_scratch_init(adev);
1790                        if (r) {
1791                                DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1792                                goto init_failed;
1793                        }
1794                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1795                        if (r) {
1796                                DRM_ERROR("hw_init %d failed %d\n", i, r);
1797                                goto init_failed;
1798                        }
1799                        r = amdgpu_device_wb_init(adev);
1800                        if (r) {
1801                                DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1802                                goto init_failed;
1803                        }
1804                        adev->ip_blocks[i].status.hw = true;
1805
1806                        /* right after GMC hw init, we create CSA */
1807                        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1808                                r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1809                                                                AMDGPU_GEM_DOMAIN_VRAM,
1810                                                                AMDGPU_CSA_SIZE);
1811                                if (r) {
1812                                        DRM_ERROR("allocate CSA failed %d\n", r);
1813                                        goto init_failed;
1814                                }
1815                        }
1816                }
1817        }
1818
1819        r = amdgpu_ib_pool_init(adev);
1820        if (r) {
1821                dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1822                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1823                goto init_failed;
1824        }
1825
1826        r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1827        if (r)
1828                goto init_failed;
1829
1830        r = amdgpu_device_ip_hw_init_phase1(adev);
1831        if (r)
1832                goto init_failed;
1833
1834        r = amdgpu_device_fw_loading(adev);
1835        if (r)
1836                goto init_failed;
1837
1838        r = amdgpu_device_ip_hw_init_phase2(adev);
1839        if (r)
1840                goto init_failed;
1841
1842        if (adev->gmc.xgmi.num_physical_nodes > 1)
1843                amdgpu_xgmi_add_device(adev);
1844        amdgpu_amdkfd_device_init(adev);
1845
1846init_failed:
1847        if (amdgpu_sriov_vf(adev)) {
1848                if (!r)
1849                        amdgpu_virt_init_data_exchange(adev);
1850                amdgpu_virt_release_full_gpu(adev, true);
1851        }
1852
1853        return r;
1854}
1855
1856/**
1857 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1858 *
1859 * @adev: amdgpu_device pointer
1860 *
1861 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
1862 * this function before a GPU reset.  If the value is retained after a
1863 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
1864 */
1865static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1866{
1867        memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1868}
1869
1870/**
1871 * amdgpu_device_check_vram_lost - check if vram is valid
1872 *
1873 * @adev: amdgpu_device pointer
1874 *
1875 * Checks the reset magic value written to the gart pointer in VRAM.
1876 * The driver calls this after a GPU reset to see if the contents of
1877 * VRAM is lost or now.
1878 * returns true if vram is lost, false if not.
1879 */
1880static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1881{
1882        return !!memcmp(adev->gart.ptr, adev->reset_magic,
1883                        AMDGPU_RESET_MAGIC_NUM);
1884}
1885
1886/**
1887 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
1888 *
1889 * @adev: amdgpu_device pointer
1890 *
1891 * The list of all the hardware IPs that make up the asic is walked and the
1892 * set_clockgating_state callbacks are run.
1893 * Late initialization pass enabling clockgating for hardware IPs.
1894 * Fini or suspend, pass disabling clockgating for hardware IPs.
1895 * Returns 0 on success, negative error code on failure.
1896 */
1897
1898static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1899                                                enum amd_clockgating_state state)
1900{
1901        int i, j, r;
1902
1903        if (amdgpu_emu_mode == 1)
1904                return 0;
1905
1906        for (j = 0; j < adev->num_ip_blocks; j++) {
1907                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1908                if (!adev->ip_blocks[i].status.late_initialized)
1909                        continue;
1910                /* skip CG for VCE/UVD, it's handled specially */
1911                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1912                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1913                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1914                    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1915                        /* enable clockgating to save power */
1916                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1917                                                                                     state);
1918                        if (r) {
1919                                DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
1920                                          adev->ip_blocks[i].version->funcs->name, r);
1921                                return r;
1922                        }
1923                }
1924        }
1925
1926        return 0;
1927}
1928
1929static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
1930{
1931        int i, j, r;
1932
1933        if (amdgpu_emu_mode == 1)
1934                return 0;
1935
1936        for (j = 0; j < adev->num_ip_blocks; j++) {
1937                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1938                if (!adev->ip_blocks[i].status.late_initialized)
1939                        continue;
1940                /* skip CG for VCE/UVD, it's handled specially */
1941                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1942                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1943                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1944                    adev->ip_blocks[i].version->funcs->set_powergating_state) {
1945                        /* enable powergating to save power */
1946                        r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1947                                                                                        state);
1948                        if (r) {
1949                                DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1950                                          adev->ip_blocks[i].version->funcs->name, r);
1951                                return r;
1952                        }
1953                }
1954        }
1955        return 0;
1956}
1957
1958static int amdgpu_device_enable_mgpu_fan_boost(void)
1959{
1960        struct amdgpu_gpu_instance *gpu_ins;
1961        struct amdgpu_device *adev;
1962        int i, ret = 0;
1963
1964        mutex_lock(&mgpu_info.mutex);
1965
1966        /*
1967         * MGPU fan boost feature should be enabled
1968         * only when there are two or more dGPUs in
1969         * the system
1970         */
1971        if (mgpu_info.num_dgpu < 2)
1972                goto out;
1973
1974        for (i = 0; i < mgpu_info.num_dgpu; i++) {
1975                gpu_ins = &(mgpu_info.gpu_ins[i]);
1976                adev = gpu_ins->adev;
1977                if (!(adev->flags & AMD_IS_APU) &&
1978                    !gpu_ins->mgpu_fan_enabled &&
1979                    adev->powerplay.pp_funcs &&
1980                    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1981                        ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1982                        if (ret)
1983                                break;
1984
1985                        gpu_ins->mgpu_fan_enabled = 1;
1986                }
1987        }
1988
1989out:
1990        mutex_unlock(&mgpu_info.mutex);
1991
1992        return ret;
1993}
1994
1995/**
1996 * amdgpu_device_ip_late_init - run late init for hardware IPs
1997 *
1998 * @adev: amdgpu_device pointer
1999 *
2000 * Late initialization pass for hardware IPs.  The list of all the hardware
2001 * IPs that make up the asic is walked and the late_init callbacks are run.
2002 * late_init covers any special initialization that an IP requires
2003 * after all of the have been initialized or something that needs to happen
2004 * late in the init process.
2005 * Returns 0 on success, negative error code on failure.
2006 */
2007static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2008{
2009        int i = 0, r;
2010
2011        for (i = 0; i < adev->num_ip_blocks; i++) {
2012                if (!adev->ip_blocks[i].status.hw)
2013                        continue;
2014                if (adev->ip_blocks[i].version->funcs->late_init) {
2015                        r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2016                        if (r) {
2017                                DRM_ERROR("late_init of IP block <%s> failed %d\n",
2018                                          adev->ip_blocks[i].version->funcs->name, r);
2019                                return r;
2020                        }
2021                }
2022                adev->ip_blocks[i].status.late_initialized = true;
2023        }
2024
2025        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2026        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2027
2028        amdgpu_device_fill_reset_magic(adev);
2029
2030        r = amdgpu_device_enable_mgpu_fan_boost();
2031        if (r)
2032                DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2033
2034        /* set to low pstate by default */
2035        amdgpu_xgmi_set_pstate(adev, 0);
2036
2037        return 0;
2038}
2039
2040/**
2041 * amdgpu_device_ip_fini - run fini for hardware IPs
2042 *
2043 * @adev: amdgpu_device pointer
2044 *
2045 * Main teardown pass for hardware IPs.  The list of all the hardware
2046 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2047 * are run.  hw_fini tears down the hardware associated with each IP
2048 * and sw_fini tears down any software state associated with each IP.
2049 * Returns 0 on success, negative error code on failure.
2050 */
2051static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2052{
2053        int i, r;
2054
2055        amdgpu_ras_pre_fini(adev);
2056
2057        if (adev->gmc.xgmi.num_physical_nodes > 1)
2058                amdgpu_xgmi_remove_device(adev);
2059
2060        amdgpu_amdkfd_device_fini(adev);
2061
2062        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2063        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2064
2065        /* need to disable SMC first */
2066        for (i = 0; i < adev->num_ip_blocks; i++) {
2067                if (!adev->ip_blocks[i].status.hw)
2068                        continue;
2069                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2070                        r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2071                        /* XXX handle errors */
2072                        if (r) {
2073                                DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2074                                          adev->ip_blocks[i].version->funcs->name, r);
2075                        }
2076                        adev->ip_blocks[i].status.hw = false;
2077                        break;
2078                }
2079        }
2080
2081        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2082                if (!adev->ip_blocks[i].status.hw)
2083                        continue;
2084
2085                r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2086                /* XXX handle errors */
2087                if (r) {
2088                        DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2089                                  adev->ip_blocks[i].version->funcs->name, r);
2090                }
2091
2092                adev->ip_blocks[i].status.hw = false;
2093        }
2094
2095
2096        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2097                if (!adev->ip_blocks[i].status.sw)
2098                        continue;
2099
2100                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2101                        amdgpu_ucode_free_bo(adev);
2102                        amdgpu_free_static_csa(&adev->virt.csa_obj);
2103                        amdgpu_device_wb_fini(adev);
2104                        amdgpu_device_vram_scratch_fini(adev);
2105                        amdgpu_ib_pool_fini(adev);
2106                }
2107
2108                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2109                /* XXX handle errors */
2110                if (r) {
2111                        DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2112                                  adev->ip_blocks[i].version->funcs->name, r);
2113                }
2114                adev->ip_blocks[i].status.sw = false;
2115                adev->ip_blocks[i].status.valid = false;
2116        }
2117
2118        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2119                if (!adev->ip_blocks[i].status.late_initialized)
2120                        continue;
2121                if (adev->ip_blocks[i].version->funcs->late_fini)
2122                        adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2123                adev->ip_blocks[i].status.late_initialized = false;
2124        }
2125
2126        amdgpu_ras_fini(adev);
2127
2128        if (amdgpu_sriov_vf(adev))
2129                if (amdgpu_virt_release_full_gpu(adev, false))
2130                        DRM_ERROR("failed to release exclusive mode on fini\n");
2131
2132        return 0;
2133}
2134
2135/**
2136 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2137 *
2138 * @work: work_struct.
2139 */
2140static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2141{
2142        struct amdgpu_device *adev =
2143                container_of(work, struct amdgpu_device, delayed_init_work.work);
2144        int r;
2145
2146        r = amdgpu_ib_ring_tests(adev);
2147        if (r)
2148                DRM_ERROR("ib ring test failed (%d).\n", r);
2149}
2150
2151static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2152{
2153        struct amdgpu_device *adev =
2154                container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2155
2156        mutex_lock(&adev->gfx.gfx_off_mutex);
2157        if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2158                if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2159                        adev->gfx.gfx_off_state = true;
2160        }
2161        mutex_unlock(&adev->gfx.gfx_off_mutex);
2162}
2163
2164/**
2165 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2166 *
2167 * @adev: amdgpu_device pointer
2168 *
2169 * Main suspend function for hardware IPs.  The list of all the hardware
2170 * IPs that make up the asic is walked, clockgating is disabled and the
2171 * suspend callbacks are run.  suspend puts the hardware and software state
2172 * in each IP into a state suitable for suspend.
2173 * Returns 0 on success, negative error code on failure.
2174 */
2175static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2176{
2177        int i, r;
2178
2179        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2180        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2181
2182        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2183                if (!adev->ip_blocks[i].status.valid)
2184                        continue;
2185                /* displays are handled separately */
2186                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2187                        /* XXX handle errors */
2188                        r = adev->ip_blocks[i].version->funcs->suspend(adev);
2189                        /* XXX handle errors */
2190                        if (r) {
2191                                DRM_ERROR("suspend of IP block <%s> failed %d\n",
2192                                          adev->ip_blocks[i].version->funcs->name, r);
2193                                return r;
2194                        }
2195                        adev->ip_blocks[i].status.hw = false;
2196                }
2197        }
2198
2199        return 0;
2200}
2201
2202/**
2203 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2204 *
2205 * @adev: amdgpu_device pointer
2206 *
2207 * Main suspend function for hardware IPs.  The list of all the hardware
2208 * IPs that make up the asic is walked, clockgating is disabled and the
2209 * suspend callbacks are run.  suspend puts the hardware and software state
2210 * in each IP into a state suitable for suspend.
2211 * Returns 0 on success, negative error code on failure.
2212 */
2213static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2214{
2215        int i, r;
2216
2217        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2218                if (!adev->ip_blocks[i].status.valid)
2219                        continue;
2220                /* displays are handled in phase1 */
2221                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2222                        continue;
2223                /* XXX handle errors */
2224                r = adev->ip_blocks[i].version->funcs->suspend(adev);
2225                /* XXX handle errors */
2226                if (r) {
2227                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
2228                                  adev->ip_blocks[i].version->funcs->name, r);
2229                }
2230                adev->ip_blocks[i].status.hw = false;
2231                /* handle putting the SMC in the appropriate state */
2232                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2233                        if (is_support_sw_smu(adev)) {
2234                                /* todo */
2235                        } else if (adev->powerplay.pp_funcs &&
2236                                           adev->powerplay.pp_funcs->set_mp1_state) {
2237                                r = adev->powerplay.pp_funcs->set_mp1_state(
2238                                        adev->powerplay.pp_handle,
2239                                        adev->mp1_state);
2240                                if (r) {
2241                                        DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2242                                                  adev->mp1_state, r);
2243                                        return r;
2244                                }
2245                        }
2246                }
2247
2248                adev->ip_blocks[i].status.hw = false;
2249        }
2250
2251        return 0;
2252}
2253
2254/**
2255 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2256 *
2257 * @adev: amdgpu_device pointer
2258 *
2259 * Main suspend function for hardware IPs.  The list of all the hardware
2260 * IPs that make up the asic is walked, clockgating is disabled and the
2261 * suspend callbacks are run.  suspend puts the hardware and software state
2262 * in each IP into a state suitable for suspend.
2263 * Returns 0 on success, negative error code on failure.
2264 */
2265int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2266{
2267        int r;
2268
2269        if (amdgpu_sriov_vf(adev))
2270                amdgpu_virt_request_full_gpu(adev, false);
2271
2272        r = amdgpu_device_ip_suspend_phase1(adev);
2273        if (r)
2274                return r;
2275        r = amdgpu_device_ip_suspend_phase2(adev);
2276
2277        if (amdgpu_sriov_vf(adev))
2278                amdgpu_virt_release_full_gpu(adev, false);
2279
2280        return r;
2281}
2282
2283static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2284{
2285        int i, r;
2286
2287        static enum amd_ip_block_type ip_order[] = {
2288                AMD_IP_BLOCK_TYPE_GMC,
2289                AMD_IP_BLOCK_TYPE_COMMON,
2290                AMD_IP_BLOCK_TYPE_PSP,
2291                AMD_IP_BLOCK_TYPE_IH,
2292        };
2293
2294        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2295                int j;
2296                struct amdgpu_ip_block *block;
2297
2298                for (j = 0; j < adev->num_ip_blocks; j++) {
2299                        block = &adev->ip_blocks[j];
2300
2301                        block->status.hw = false;
2302                        if (block->version->type != ip_order[i] ||
2303                                !block->status.valid)
2304                                continue;
2305
2306                        r = block->version->funcs->hw_init(adev);
2307                        DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2308                        if (r)
2309                                return r;
2310                        block->status.hw = true;
2311                }
2312        }
2313
2314        return 0;
2315}
2316
2317static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2318{
2319        int i, r;
2320
2321        static enum amd_ip_block_type ip_order[] = {
2322                AMD_IP_BLOCK_TYPE_SMC,
2323                AMD_IP_BLOCK_TYPE_DCE,
2324                AMD_IP_BLOCK_TYPE_GFX,
2325                AMD_IP_BLOCK_TYPE_SDMA,
2326                AMD_IP_BLOCK_TYPE_UVD,
2327                AMD_IP_BLOCK_TYPE_VCE
2328        };
2329
2330        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2331                int j;
2332                struct amdgpu_ip_block *block;
2333
2334                for (j = 0; j < adev->num_ip_blocks; j++) {
2335                        block = &adev->ip_blocks[j];
2336
2337                        if (block->version->type != ip_order[i] ||
2338                                !block->status.valid ||
2339                                block->status.hw)
2340                                continue;
2341
2342                        r = block->version->funcs->hw_init(adev);
2343                        DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2344                        if (r)
2345                                return r;
2346                        block->status.hw = true;
2347                }
2348        }
2349
2350        return 0;
2351}
2352
2353/**
2354 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2355 *
2356 * @adev: amdgpu_device pointer
2357 *
2358 * First resume function for hardware IPs.  The list of all the hardware
2359 * IPs that make up the asic is walked and the resume callbacks are run for
2360 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2361 * after a suspend and updates the software state as necessary.  This
2362 * function is also used for restoring the GPU after a GPU reset.
2363 * Returns 0 on success, negative error code on failure.
2364 */
2365static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2366{
2367        int i, r;
2368
2369        for (i = 0; i < adev->num_ip_blocks; i++) {
2370                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2371                        continue;
2372                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2373                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2374                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2375
2376                        r = adev->ip_blocks[i].version->funcs->resume(adev);
2377                        if (r) {
2378                                DRM_ERROR("resume of IP block <%s> failed %d\n",
2379                                          adev->ip_blocks[i].version->funcs->name, r);
2380                                return r;
2381                        }
2382                        adev->ip_blocks[i].status.hw = true;
2383                }
2384        }
2385
2386        return 0;
2387}
2388
2389/**
2390 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2391 *
2392 * @adev: amdgpu_device pointer
2393 *
2394 * First resume function for hardware IPs.  The list of all the hardware
2395 * IPs that make up the asic is walked and the resume callbacks are run for
2396 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2397 * functional state after a suspend and updates the software state as
2398 * necessary.  This function is also used for restoring the GPU after a GPU
2399 * reset.
2400 * Returns 0 on success, negative error code on failure.
2401 */
2402static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2403{
2404        int i, r;
2405
2406        for (i = 0; i < adev->num_ip_blocks; i++) {
2407                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2408                        continue;
2409                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2410                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2411                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2412                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2413                        continue;
2414                r = adev->ip_blocks[i].version->funcs->resume(adev);
2415                if (r) {
2416                        DRM_ERROR("resume of IP block <%s> failed %d\n",
2417                                  adev->ip_blocks[i].version->funcs->name, r);
2418                        return r;
2419                }
2420                adev->ip_blocks[i].status.hw = true;
2421        }
2422
2423        return 0;
2424}
2425
2426/**
2427 * amdgpu_device_ip_resume - run resume for hardware IPs
2428 *
2429 * @adev: amdgpu_device pointer
2430 *
2431 * Main resume function for hardware IPs.  The hardware IPs
2432 * are split into two resume functions because they are
2433 * are also used in in recovering from a GPU reset and some additional
2434 * steps need to be take between them.  In this case (S3/S4) they are
2435 * run sequentially.
2436 * Returns 0 on success, negative error code on failure.
2437 */
2438static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2439{
2440        int r;
2441
2442        r = amdgpu_device_ip_resume_phase1(adev);
2443        if (r)
2444                return r;
2445
2446        r = amdgpu_device_fw_loading(adev);
2447        if (r)
2448                return r;
2449
2450        r = amdgpu_device_ip_resume_phase2(adev);
2451
2452        return r;
2453}
2454
2455/**
2456 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2457 *
2458 * @adev: amdgpu_device pointer
2459 *
2460 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2461 */
2462static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2463{
2464        if (amdgpu_sriov_vf(adev)) {
2465                if (adev->is_atom_fw) {
2466                        if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2467                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2468                } else {
2469                        if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2470                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2471                }
2472
2473                if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2474                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2475        }
2476}
2477
2478/**
2479 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2480 *
2481 * @asic_type: AMD asic type
2482 *
2483 * Check if there is DC (new modesetting infrastructre) support for an asic.
2484 * returns true if DC has support, false if not.
2485 */
2486bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2487{
2488        switch (asic_type) {
2489#if defined(CONFIG_DRM_AMD_DC)
2490        case CHIP_BONAIRE:
2491        case CHIP_KAVERI:
2492        case CHIP_KABINI:
2493        case CHIP_MULLINS:
2494                /*
2495                 * We have systems in the wild with these ASICs that require
2496                 * LVDS and VGA support which is not supported with DC.
2497                 *
2498                 * Fallback to the non-DC driver here by default so as not to
2499                 * cause regressions.
2500                 */
2501                return amdgpu_dc > 0;
2502        case CHIP_HAWAII:
2503        case CHIP_CARRIZO:
2504        case CHIP_STONEY:
2505        case CHIP_POLARIS10:
2506        case CHIP_POLARIS11:
2507        case CHIP_POLARIS12:
2508        case CHIP_VEGAM:
2509        case CHIP_TONGA:
2510        case CHIP_FIJI:
2511        case CHIP_VEGA10:
2512        case CHIP_VEGA12:
2513        case CHIP_VEGA20:
2514#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2515        case CHIP_RAVEN:
2516#endif
2517#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2518        case CHIP_NAVI10:
2519        case CHIP_NAVI14:
2520        case CHIP_NAVI12:
2521#endif
2522#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
2523        case CHIP_RENOIR:
2524#endif
2525                return amdgpu_dc != 0;
2526#endif
2527        default:
2528                return false;
2529        }
2530}
2531
2532/**
2533 * amdgpu_device_has_dc_support - check if dc is supported
2534 *
2535 * @adev: amdgpu_device_pointer
2536 *
2537 * Returns true for supported, false for not supported
2538 */
2539bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2540{
2541        if (amdgpu_sriov_vf(adev))
2542                return false;
2543
2544        return amdgpu_device_asic_has_dc_support(adev->asic_type);
2545}
2546
2547
2548static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2549{
2550        struct amdgpu_device *adev =
2551                container_of(__work, struct amdgpu_device, xgmi_reset_work);
2552
2553        adev->asic_reset_res =  amdgpu_asic_reset(adev);
2554        if (adev->asic_reset_res)
2555                DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2556                         adev->asic_reset_res, adev->ddev->unique);
2557}
2558
2559
2560/**
2561 * amdgpu_device_init - initialize the driver
2562 *
2563 * @adev: amdgpu_device pointer
2564 * @ddev: drm dev pointer
2565 * @pdev: pci dev pointer
2566 * @flags: driver flags
2567 *
2568 * Initializes the driver info and hw (all asics).
2569 * Returns 0 for success or an error on failure.
2570 * Called at driver startup.
2571 */
2572int amdgpu_device_init(struct amdgpu_device *adev,
2573                       struct drm_device *ddev,
2574                       struct pci_dev *pdev,
2575                       uint32_t flags)
2576{
2577        int r, i;
2578        bool runtime = false;
2579        u32 max_MBps;
2580
2581        adev->shutdown = false;
2582        adev->dev = &pdev->dev;
2583        adev->ddev = ddev;
2584        adev->pdev = pdev;
2585        adev->flags = flags;
2586        adev->asic_type = flags & AMD_ASIC_MASK;
2587        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2588        if (amdgpu_emu_mode == 1)
2589                adev->usec_timeout *= 2;
2590        adev->gmc.gart_size = 512 * 1024 * 1024;
2591        adev->accel_working = false;
2592        adev->num_rings = 0;
2593        adev->mman.buffer_funcs = NULL;
2594        adev->mman.buffer_funcs_ring = NULL;
2595        adev->vm_manager.vm_pte_funcs = NULL;
2596        adev->vm_manager.vm_pte_num_rqs = 0;
2597        adev->gmc.gmc_funcs = NULL;
2598        adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2599        bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2600
2601        adev->smc_rreg = &amdgpu_invalid_rreg;
2602        adev->smc_wreg = &amdgpu_invalid_wreg;
2603        adev->pcie_rreg = &amdgpu_invalid_rreg;
2604        adev->pcie_wreg = &amdgpu_invalid_wreg;
2605        adev->pciep_rreg = &amdgpu_invalid_rreg;
2606        adev->pciep_wreg = &amdgpu_invalid_wreg;
2607        adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2608        adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2609        adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2610        adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2611        adev->didt_rreg = &amdgpu_invalid_rreg;
2612        adev->didt_wreg = &amdgpu_invalid_wreg;
2613        adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2614        adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2615        adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2616        adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2617
2618        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2619                 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2620                 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2621
2622        /* mutex initialization are all done here so we
2623         * can recall function without having locking issues */
2624        atomic_set(&adev->irq.ih.lock, 0);
2625        mutex_init(&adev->firmware.mutex);
2626        mutex_init(&adev->pm.mutex);
2627        mutex_init(&adev->gfx.gpu_clock_mutex);
2628        mutex_init(&adev->srbm_mutex);
2629        mutex_init(&adev->gfx.pipe_reserve_mutex);
2630        mutex_init(&adev->gfx.gfx_off_mutex);
2631        mutex_init(&adev->grbm_idx_mutex);
2632        mutex_init(&adev->mn_lock);
2633        mutex_init(&adev->virt.vf_errors.lock);
2634        hash_init(adev->mn_hash);
2635        mutex_init(&adev->lock_reset);
2636        mutex_init(&adev->virt.dpm_mutex);
2637        mutex_init(&adev->psp.mutex);
2638
2639        r = amdgpu_device_check_arguments(adev);
2640        if (r)
2641                return r;
2642
2643        spin_lock_init(&adev->mmio_idx_lock);
2644        spin_lock_init(&adev->smc_idx_lock);
2645        spin_lock_init(&adev->pcie_idx_lock);
2646        spin_lock_init(&adev->uvd_ctx_idx_lock);
2647        spin_lock_init(&adev->didt_idx_lock);
2648        spin_lock_init(&adev->gc_cac_idx_lock);
2649        spin_lock_init(&adev->se_cac_idx_lock);
2650        spin_lock_init(&adev->audio_endpt_idx_lock);
2651        spin_lock_init(&adev->mm_stats.lock);
2652
2653        INIT_LIST_HEAD(&adev->shadow_list);
2654        mutex_init(&adev->shadow_list_lock);
2655
2656        INIT_LIST_HEAD(&adev->ring_lru_list);
2657        spin_lock_init(&adev->ring_lru_list_lock);
2658
2659        INIT_DELAYED_WORK(&adev->delayed_init_work,
2660                          amdgpu_device_delayed_init_work_handler);
2661        INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2662                          amdgpu_device_delay_enable_gfx_off);
2663
2664        INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2665
2666        adev->gfx.gfx_off_req_count = 1;
2667        adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2668
2669        /* Registers mapping */
2670        /* TODO: block userspace mapping of io register */
2671        if (adev->asic_type >= CHIP_BONAIRE) {
2672                adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2673                adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2674        } else {
2675                adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2676                adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2677        }
2678
2679        adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2680        if (adev->rmmio == NULL) {
2681                return -ENOMEM;
2682        }
2683        DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2684        DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2685
2686        /* io port mapping */
2687        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2688                if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2689                        adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2690                        adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2691                        break;
2692                }
2693        }
2694        if (adev->rio_mem == NULL)
2695                DRM_INFO("PCI I/O BAR is not found.\n");
2696
2697        /* enable PCIE atomic ops */
2698        r = pci_enable_atomic_ops_to_root(adev->pdev,
2699                                          PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2700                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2701        if (r) {
2702                adev->have_atomics_support = false;
2703                DRM_INFO("PCIE atomic ops is not supported\n");
2704        } else {
2705                adev->have_atomics_support = true;
2706        }
2707
2708        amdgpu_device_get_pcie_info(adev);
2709
2710        if (amdgpu_mcbp)
2711                DRM_INFO("MCBP is enabled\n");
2712
2713        if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2714                adev->enable_mes = true;
2715
2716        if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
2717                r = amdgpu_discovery_init(adev);
2718                if (r) {
2719                        dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2720                        return r;
2721                }
2722        }
2723
2724        /* early init functions */
2725        r = amdgpu_device_ip_early_init(adev);
2726        if (r)
2727                return r;
2728
2729        /* doorbell bar mapping and doorbell index init*/
2730        amdgpu_device_doorbell_init(adev);
2731
2732        /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2733        /* this will fail for cards that aren't VGA class devices, just
2734         * ignore it */
2735        vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
2736
2737        if (amdgpu_device_is_px(ddev))
2738                runtime = true;
2739        if (!pci_is_thunderbolt_attached(adev->pdev))
2740                vga_switcheroo_register_client(adev->pdev,
2741                                               &amdgpu_switcheroo_ops, runtime);
2742        if (runtime)
2743                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2744
2745        if (amdgpu_emu_mode == 1) {
2746                /* post the asic on emulation mode */
2747                emu_soc_asic_init(adev);
2748                goto fence_driver_init;
2749        }
2750
2751        /* detect if we are with an SRIOV vbios */
2752        amdgpu_device_detect_sriov_bios(adev);
2753
2754        /* check if we need to reset the asic
2755         *  E.g., driver was not cleanly unloaded previously, etc.
2756         */
2757        if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
2758                r = amdgpu_asic_reset(adev);
2759                if (r) {
2760                        dev_err(adev->dev, "asic reset on init failed\n");
2761                        goto failed;
2762                }
2763        }
2764
2765        /* Post card if necessary */
2766        if (amdgpu_device_need_post(adev)) {
2767                if (!adev->bios) {
2768                        dev_err(adev->dev, "no vBIOS found\n");
2769                        r = -EINVAL;
2770                        goto failed;
2771                }
2772                DRM_INFO("GPU posting now...\n");
2773                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2774                if (r) {
2775                        dev_err(adev->dev, "gpu post error!\n");
2776                        goto failed;
2777                }
2778        }
2779
2780        if (adev->is_atom_fw) {
2781                /* Initialize clocks */
2782                r = amdgpu_atomfirmware_get_clock_info(adev);
2783                if (r) {
2784                        dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2785                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2786                        goto failed;
2787                }
2788        } else {
2789                /* Initialize clocks */
2790                r = amdgpu_atombios_get_clock_info(adev);
2791                if (r) {
2792                        dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2793                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2794                        goto failed;
2795                }
2796                /* init i2c buses */
2797                if (!amdgpu_device_has_dc_support(adev))
2798                        amdgpu_atombios_i2c_init(adev);
2799        }
2800
2801fence_driver_init:
2802        /* Fence driver */
2803        r = amdgpu_fence_driver_init(adev);
2804        if (r) {
2805                dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2806                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2807                goto failed;
2808        }
2809
2810        /* init the mode config */
2811        drm_mode_config_init(adev->ddev);
2812
2813        r = amdgpu_device_ip_init(adev);
2814        if (r) {
2815                /* failed in exclusive mode due to timeout */
2816                if (amdgpu_sriov_vf(adev) &&
2817                    !amdgpu_sriov_runtime(adev) &&
2818                    amdgpu_virt_mmio_blocked(adev) &&
2819                    !amdgpu_virt_wait_reset(adev)) {
2820                        dev_err(adev->dev, "VF exclusive mode timeout\n");
2821                        /* Don't send request since VF is inactive. */
2822                        adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2823                        adev->virt.ops = NULL;
2824                        r = -EAGAIN;
2825                        goto failed;
2826                }
2827                dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2828                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2829                if (amdgpu_virt_request_full_gpu(adev, false))
2830                        amdgpu_virt_release_full_gpu(adev, false);
2831                goto failed;
2832        }
2833
2834        adev->accel_working = true;
2835
2836        amdgpu_vm_check_compute_bug(adev);
2837
2838        /* Initialize the buffer migration limit. */
2839        if (amdgpu_moverate >= 0)
2840                max_MBps = amdgpu_moverate;
2841        else
2842                max_MBps = 8; /* Allow 8 MB/s. */
2843        /* Get a log2 for easy divisions. */
2844        adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2845
2846        amdgpu_fbdev_init(adev);
2847
2848        if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2849                amdgpu_pm_virt_sysfs_init(adev);
2850
2851        r = amdgpu_pm_sysfs_init(adev);
2852        if (r)
2853                DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2854
2855        r = amdgpu_ucode_sysfs_init(adev);
2856        if (r)
2857                DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2858
2859        r = amdgpu_debugfs_gem_init(adev);
2860        if (r)
2861                DRM_ERROR("registering gem debugfs failed (%d).\n", r);
2862
2863        r = amdgpu_debugfs_regs_init(adev);
2864        if (r)
2865                DRM_ERROR("registering register debugfs failed (%d).\n", r);
2866
2867        r = amdgpu_debugfs_firmware_init(adev);
2868        if (r)
2869                DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2870
2871        r = amdgpu_debugfs_init(adev);
2872        if (r)
2873                DRM_ERROR("Creating debugfs files failed (%d).\n", r);
2874
2875        if ((amdgpu_testing & 1)) {
2876                if (adev->accel_working)
2877                        amdgpu_test_moves(adev);
2878                else
2879                        DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2880        }
2881        if (amdgpu_benchmarking) {
2882                if (adev->accel_working)
2883                        amdgpu_benchmark(adev, amdgpu_benchmarking);
2884                else
2885                        DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2886        }
2887
2888        /*
2889         * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
2890         * Otherwise the mgpu fan boost feature will be skipped due to the
2891         * gpu instance is counted less.
2892         */
2893        amdgpu_register_gpu_instance(adev);
2894
2895        /* enable clockgating, etc. after ib tests, etc. since some blocks require
2896         * explicit gating rather than handling it automatically.
2897         */
2898        r = amdgpu_device_ip_late_init(adev);
2899        if (r) {
2900                dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
2901                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2902                goto failed;
2903        }
2904
2905        /* must succeed. */
2906        amdgpu_ras_resume(adev);
2907
2908        queue_delayed_work(system_wq, &adev->delayed_init_work,
2909                           msecs_to_jiffies(AMDGPU_RESUME_MS));
2910
2911        r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2912        if (r) {
2913                dev_err(adev->dev, "Could not create pcie_replay_count");
2914                return r;
2915        }
2916
2917        if (IS_ENABLED(CONFIG_PERF_EVENTS))
2918                r = amdgpu_pmu_init(adev);
2919        if (r)
2920                dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2921
2922        return 0;
2923
2924failed:
2925        amdgpu_vf_error_trans_all(adev);
2926        if (runtime)
2927                vga_switcheroo_fini_domain_pm_ops(adev->dev);
2928
2929        return r;
2930}
2931
2932/**
2933 * amdgpu_device_fini - tear down the driver
2934 *
2935 * @adev: amdgpu_device pointer
2936 *
2937 * Tear down the driver info (all asics).
2938 * Called at driver shutdown.
2939 */
2940void amdgpu_device_fini(struct amdgpu_device *adev)
2941{
2942        int r;
2943
2944        DRM_INFO("amdgpu: finishing device.\n");
2945        adev->shutdown = true;
2946        /* disable all interrupts */
2947        amdgpu_irq_disable_all(adev);
2948        if (adev->mode_info.mode_config_initialized){
2949                if (!amdgpu_device_has_dc_support(adev))
2950                        drm_helper_force_disable_all(adev->ddev);
2951                else
2952                        drm_atomic_helper_shutdown(adev->ddev);
2953        }
2954        amdgpu_fence_driver_fini(adev);
2955        amdgpu_pm_sysfs_fini(adev);
2956        amdgpu_fbdev_fini(adev);
2957        r = amdgpu_device_ip_fini(adev);
2958        if (adev->firmware.gpu_info_fw) {
2959                release_firmware(adev->firmware.gpu_info_fw);
2960                adev->firmware.gpu_info_fw = NULL;
2961        }
2962        adev->accel_working = false;
2963        cancel_delayed_work_sync(&adev->delayed_init_work);
2964        /* free i2c buses */
2965        if (!amdgpu_device_has_dc_support(adev))
2966                amdgpu_i2c_fini(adev);
2967
2968        if (amdgpu_emu_mode != 1)
2969                amdgpu_atombios_fini(adev);
2970
2971        kfree(adev->bios);
2972        adev->bios = NULL;
2973        if (!pci_is_thunderbolt_attached(adev->pdev))
2974                vga_switcheroo_unregister_client(adev->pdev);
2975        if (adev->flags & AMD_IS_PX)
2976                vga_switcheroo_fini_domain_pm_ops(adev->dev);
2977        vga_client_register(adev->pdev, NULL, NULL, NULL);
2978        if (adev->rio_mem)
2979                pci_iounmap(adev->pdev, adev->rio_mem);
2980        adev->rio_mem = NULL;
2981        iounmap(adev->rmmio);
2982        adev->rmmio = NULL;
2983        amdgpu_device_doorbell_fini(adev);
2984        if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2985                amdgpu_pm_virt_sysfs_fini(adev);
2986
2987        amdgpu_debugfs_regs_cleanup(adev);
2988        device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
2989        amdgpu_ucode_sysfs_fini(adev);
2990        if (IS_ENABLED(CONFIG_PERF_EVENTS))
2991                amdgpu_pmu_fini(adev);
2992        amdgpu_debugfs_preempt_cleanup(adev);
2993        if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
2994                amdgpu_discovery_fini(adev);
2995}
2996
2997
2998/*
2999 * Suspend & resume.
3000 */
3001/**
3002 * amdgpu_device_suspend - initiate device suspend
3003 *
3004 * @dev: drm dev pointer
3005 * @suspend: suspend state
3006 * @fbcon : notify the fbdev of suspend
3007 *
3008 * Puts the hw in the suspend state (all asics).
3009 * Returns 0 for success or an error on failure.
3010 * Called at driver suspend.
3011 */
3012int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
3013{
3014        struct amdgpu_device *adev;
3015        struct drm_crtc *crtc;
3016        struct drm_connector *connector;
3017        int r;
3018
3019        if (dev == NULL || dev->dev_private == NULL) {
3020                return -ENODEV;
3021        }
3022
3023        adev = dev->dev_private;
3024
3025        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3026                return 0;
3027
3028        adev->in_suspend = true;
3029        drm_kms_helper_poll_disable(dev);
3030
3031        if (fbcon)
3032                amdgpu_fbdev_set_suspend(adev, 1);
3033
3034        cancel_delayed_work_sync(&adev->delayed_init_work);
3035
3036        if (!amdgpu_device_has_dc_support(adev)) {
3037                /* turn off display hw */
3038                drm_modeset_lock_all(dev);
3039                list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3040                        drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3041                }
3042                drm_modeset_unlock_all(dev);
3043                        /* unpin the front buffers and cursors */
3044                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3045                        struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3046                        struct drm_framebuffer *fb = crtc->primary->fb;
3047                        struct amdgpu_bo *robj;
3048
3049                        if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3050                                struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3051                                r = amdgpu_bo_reserve(aobj, true);
3052                                if (r == 0) {
3053                                        amdgpu_bo_unpin(aobj);
3054                                        amdgpu_bo_unreserve(aobj);
3055                                }
3056                        }
3057
3058                        if (fb == NULL || fb->obj[0] == NULL) {
3059                                continue;
3060                        }
3061                        robj = gem_to_amdgpu_bo(fb->obj[0]);
3062                        /* don't unpin kernel fb objects */
3063                        if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3064                                r = amdgpu_bo_reserve(robj, true);
3065                                if (r == 0) {
3066                                        amdgpu_bo_unpin(robj);
3067                                        amdgpu_bo_unreserve(robj);
3068                                }
3069                        }
3070                }
3071        }
3072
3073        amdgpu_amdkfd_suspend(adev);
3074
3075        amdgpu_ras_suspend(adev);
3076
3077        r = amdgpu_device_ip_suspend_phase1(adev);
3078
3079        /* evict vram memory */
3080        amdgpu_bo_evict_vram(adev);
3081
3082        amdgpu_fence_driver_suspend(adev);
3083
3084        r = amdgpu_device_ip_suspend_phase2(adev);
3085
3086        /* evict remaining vram memory
3087         * This second call to evict vram is to evict the gart page table
3088         * using the CPU.
3089         */
3090        amdgpu_bo_evict_vram(adev);
3091
3092        pci_save_state(dev->pdev);
3093        if (suspend) {
3094                /* Shut down the device */
3095                pci_disable_device(dev->pdev);
3096                pci_set_power_state(dev->pdev, PCI_D3hot);
3097        } else {
3098                r = amdgpu_asic_reset(adev);
3099                if (r)
3100                        DRM_ERROR("amdgpu asic reset failed\n");
3101        }
3102
3103        return 0;
3104}
3105
3106/**
3107 * amdgpu_device_resume - initiate device resume
3108 *
3109 * @dev: drm dev pointer
3110 * @resume: resume state
3111 * @fbcon : notify the fbdev of resume
3112 *
3113 * Bring the hw back to operating state (all asics).
3114 * Returns 0 for success or an error on failure.
3115 * Called at driver resume.
3116 */
3117int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
3118{
3119        struct drm_connector *connector;
3120        struct amdgpu_device *adev = dev->dev_private;
3121        struct drm_crtc *crtc;
3122        int r = 0;
3123
3124        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3125                return 0;
3126
3127        if (resume) {
3128                pci_set_power_state(dev->pdev, PCI_D0);
3129                pci_restore_state(dev->pdev);
3130                r = pci_enable_device(dev->pdev);
3131                if (r)
3132                        return r;
3133        }
3134
3135        /* post card */
3136        if (amdgpu_device_need_post(adev)) {
3137                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3138                if (r)
3139                        DRM_ERROR("amdgpu asic init failed\n");
3140        }
3141
3142        r = amdgpu_device_ip_resume(adev);
3143        if (r) {
3144                DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3145                return r;
3146        }
3147        amdgpu_fence_driver_resume(adev);
3148
3149
3150        r = amdgpu_device_ip_late_init(adev);
3151        if (r)
3152                return r;
3153
3154        queue_delayed_work(system_wq, &adev->delayed_init_work,
3155                           msecs_to_jiffies(AMDGPU_RESUME_MS));
3156
3157        if (!amdgpu_device_has_dc_support(adev)) {
3158                /* pin cursors */
3159                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3160                        struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3161
3162                        if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3163                                struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3164                                r = amdgpu_bo_reserve(aobj, true);
3165                                if (r == 0) {
3166                                        r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3167                                        if (r != 0)
3168                                                DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3169                                        amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3170                                        amdgpu_bo_unreserve(aobj);
3171                                }
3172                        }
3173                }
3174        }
3175        r = amdgpu_amdkfd_resume(adev);
3176        if (r)
3177                return r;
3178
3179        /* Make sure IB tests flushed */
3180        flush_delayed_work(&adev->delayed_init_work);
3181
3182        /* blat the mode back in */
3183        if (fbcon) {
3184                if (!amdgpu_device_has_dc_support(adev)) {
3185                        /* pre DCE11 */
3186                        drm_helper_resume_force_mode(dev);
3187
3188                        /* turn on display hw */
3189                        drm_modeset_lock_all(dev);
3190                        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3191                                drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3192                        }
3193                        drm_modeset_unlock_all(dev);
3194                }
3195                amdgpu_fbdev_set_suspend(adev, 0);
3196        }
3197
3198        drm_kms_helper_poll_enable(dev);
3199
3200        amdgpu_ras_resume(adev);
3201
3202        /*
3203         * Most of the connector probing functions try to acquire runtime pm
3204         * refs to ensure that the GPU is powered on when connector polling is
3205         * performed. Since we're calling this from a runtime PM callback,
3206         * trying to acquire rpm refs will cause us to deadlock.
3207         *
3208         * Since we're guaranteed to be holding the rpm lock, it's safe to
3209         * temporarily disable the rpm helpers so this doesn't deadlock us.
3210         */
3211#ifdef CONFIG_PM
3212        dev->dev->power.disable_depth++;
3213#endif
3214        if (!amdgpu_device_has_dc_support(adev))
3215                drm_helper_hpd_irq_event(dev);
3216        else
3217                drm_kms_helper_hotplug_event(dev);
3218#ifdef CONFIG_PM
3219        dev->dev->power.disable_depth--;
3220#endif
3221        adev->in_suspend = false;
3222
3223        return 0;
3224}
3225
3226/**
3227 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3228 *
3229 * @adev: amdgpu_device pointer
3230 *
3231 * The list of all the hardware IPs that make up the asic is walked and
3232 * the check_soft_reset callbacks are run.  check_soft_reset determines
3233 * if the asic is still hung or not.
3234 * Returns true if any of the IPs are still in a hung state, false if not.
3235 */
3236static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3237{
3238        int i;
3239        bool asic_hang = false;
3240
3241        if (amdgpu_sriov_vf(adev))
3242                return true;
3243
3244        if (amdgpu_asic_need_full_reset(adev))
3245                return true;
3246
3247        for (i = 0; i < adev->num_ip_blocks; i++) {
3248                if (!adev->ip_blocks[i].status.valid)
3249                        continue;
3250                if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3251                        adev->ip_blocks[i].status.hang =
3252                                adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3253                if (adev->ip_blocks[i].status.hang) {
3254                        DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3255                        asic_hang = true;
3256                }
3257        }
3258        return asic_hang;
3259}
3260
3261/**
3262 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3263 *
3264 * @adev: amdgpu_device pointer
3265 *
3266 * The list of all the hardware IPs that make up the asic is walked and the
3267 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3268 * handles any IP specific hardware or software state changes that are
3269 * necessary for a soft reset to succeed.
3270 * Returns 0 on success, negative error code on failure.
3271 */
3272static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3273{
3274        int i, r = 0;
3275
3276        for (i = 0; i < adev->num_ip_blocks; i++) {
3277                if (!adev->ip_blocks[i].status.valid)
3278                        continue;
3279                if (adev->ip_blocks[i].status.hang &&
3280                    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3281                        r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3282                        if (r)
3283                                return r;
3284                }
3285        }
3286
3287        return 0;
3288}
3289
3290/**
3291 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3292 *
3293 * @adev: amdgpu_device pointer
3294 *
3295 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3296 * reset is necessary to recover.
3297 * Returns true if a full asic reset is required, false if not.
3298 */
3299static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3300{
3301        int i;
3302
3303        if (amdgpu_asic_need_full_reset(adev))
3304                return true;
3305
3306        for (i = 0; i < adev->num_ip_blocks; i++) {
3307                if (!adev->ip_blocks[i].status.valid)
3308                        continue;
3309                if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3310                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3311                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3312                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3313                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3314                        if (adev->ip_blocks[i].status.hang) {
3315                                DRM_INFO("Some block need full reset!\n");
3316                                return true;
3317                        }
3318                }
3319        }
3320        return false;
3321}
3322
3323/**
3324 * amdgpu_device_ip_soft_reset - do a soft reset
3325 *
3326 * @adev: amdgpu_device pointer
3327 *
3328 * The list of all the hardware IPs that make up the asic is walked and the
3329 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3330 * IP specific hardware or software state changes that are necessary to soft
3331 * reset the IP.
3332 * Returns 0 on success, negative error code on failure.
3333 */
3334static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3335{
3336        int i, r = 0;
3337
3338        for (i = 0; i < adev->num_ip_blocks; i++) {
3339                if (!adev->ip_blocks[i].status.valid)
3340                        continue;
3341                if (adev->ip_blocks[i].status.hang &&
3342                    adev->ip_blocks[i].version->funcs->soft_reset) {
3343                        r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3344                        if (r)
3345                                return r;
3346                }
3347        }
3348
3349        return 0;
3350}
3351
3352/**
3353 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * The list of all the hardware IPs that make up the asic is walked and the
3358 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3359 * handles any IP specific hardware or software state changes that are
3360 * necessary after the IP has been soft reset.
3361 * Returns 0 on success, negative error code on failure.
3362 */
3363static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3364{
3365        int i, r = 0;
3366
3367        for (i = 0; i < adev->num_ip_blocks; i++) {
3368                if (!adev->ip_blocks[i].status.valid)
3369                        continue;
3370                if (adev->ip_blocks[i].status.hang &&
3371                    adev->ip_blocks[i].version->funcs->post_soft_reset)
3372                        r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3373                if (r)
3374                        return r;
3375        }
3376
3377        return 0;
3378}
3379
3380/**
3381 * amdgpu_device_recover_vram - Recover some VRAM contents
3382 *
3383 * @adev: amdgpu_device pointer
3384 *
3385 * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3386 * restore things like GPUVM page tables after a GPU reset where
3387 * the contents of VRAM might be lost.
3388 *
3389 * Returns:
3390 * 0 on success, negative error code on failure.
3391 */
3392static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3393{
3394        struct dma_fence *fence = NULL, *next = NULL;
3395        struct amdgpu_bo *shadow;
3396        long r = 1, tmo;
3397
3398        if (amdgpu_sriov_runtime(adev))
3399                tmo = msecs_to_jiffies(8000);
3400        else
3401                tmo = msecs_to_jiffies(100);
3402
3403        DRM_INFO("recover vram bo from shadow start\n");
3404        mutex_lock(&adev->shadow_list_lock);
3405        list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3406
3407                /* No need to recover an evicted BO */
3408                if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3409                    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3410                    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3411                        continue;
3412
3413                r = amdgpu_bo_restore_shadow(shadow, &next);
3414                if (r)
3415                        break;
3416
3417                if (fence) {
3418                        tmo = dma_fence_wait_timeout(fence, false, tmo);
3419                        dma_fence_put(fence);
3420                        fence = next;
3421                        if (tmo == 0) {
3422                                r = -ETIMEDOUT;
3423                                break;
3424                        } else if (tmo < 0) {
3425                                r = tmo;
3426                                break;
3427                        }
3428                } else {
3429                        fence = next;
3430                }
3431        }
3432        mutex_unlock(&adev->shadow_list_lock);
3433
3434        if (fence)
3435                tmo = dma_fence_wait_timeout(fence, false, tmo);
3436        dma_fence_put(fence);
3437
3438        if (r < 0 || tmo <= 0) {
3439                DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3440                return -EIO;
3441        }
3442
3443        DRM_INFO("recover vram bo from shadow done\n");
3444        return 0;
3445}
3446
3447
3448/**
3449 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3450 *
3451 * @adev: amdgpu device pointer
3452 * @from_hypervisor: request from hypervisor
3453 *
3454 * do VF FLR and reinitialize Asic
3455 * return 0 means succeeded otherwise failed
3456 */
3457static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3458                                     bool from_hypervisor)
3459{
3460        int r;
3461
3462        if (from_hypervisor)
3463                r = amdgpu_virt_request_full_gpu(adev, true);
3464        else
3465                r = amdgpu_virt_reset_gpu(adev);
3466        if (r)
3467                return r;
3468
3469        amdgpu_amdkfd_pre_reset(adev);
3470
3471        /* Resume IP prior to SMC */
3472        r = amdgpu_device_ip_reinit_early_sriov(adev);
3473        if (r)
3474                goto error;
3475
3476        /* we need recover gart prior to run SMC/CP/SDMA resume */
3477        amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3478
3479        r = amdgpu_device_fw_loading(adev);
3480        if (r)
3481                return r;
3482
3483        /* now we are okay to resume SMC/CP/SDMA */
3484        r = amdgpu_device_ip_reinit_late_sriov(adev);
3485        if (r)
3486                goto error;
3487
3488        amdgpu_irq_gpu_reset_resume_helper(adev);
3489        r = amdgpu_ib_ring_tests(adev);
3490        amdgpu_amdkfd_post_reset(adev);
3491
3492error:
3493        amdgpu_virt_init_data_exchange(adev);
3494        amdgpu_virt_release_full_gpu(adev, true);
3495        if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3496                amdgpu_inc_vram_lost(adev);
3497                r = amdgpu_device_recover_vram(adev);
3498        }
3499
3500        return r;
3501}
3502
3503/**
3504 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3505 *
3506 * @adev: amdgpu device pointer
3507 *
3508 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3509 * a hung GPU.
3510 */
3511bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3512{
3513        if (!amdgpu_device_ip_check_soft_reset(adev)) {
3514                DRM_INFO("Timeout, but no hardware hang detected.\n");
3515                return false;
3516        }
3517
3518        if (amdgpu_gpu_recovery == 0)
3519                goto disabled;
3520
3521        if (amdgpu_sriov_vf(adev))
3522                return true;
3523
3524        if (amdgpu_gpu_recovery == -1) {
3525                switch (adev->asic_type) {
3526                case CHIP_BONAIRE:
3527                case CHIP_HAWAII:
3528                case CHIP_TOPAZ:
3529                case CHIP_TONGA:
3530                case CHIP_FIJI:
3531                case CHIP_POLARIS10:
3532                case CHIP_POLARIS11:
3533                case CHIP_POLARIS12:
3534                case CHIP_VEGAM:
3535                case CHIP_VEGA20:
3536                case CHIP_VEGA10:
3537                case CHIP_VEGA12:
3538                case CHIP_RAVEN:
3539                        break;
3540                default:
3541                        goto disabled;
3542                }
3543        }
3544
3545        return true;
3546
3547disabled:
3548                DRM_INFO("GPU recovery disabled.\n");
3549                return false;
3550}
3551
3552
3553static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3554                                        struct amdgpu_job *job,
3555                                        bool *need_full_reset_arg)
3556{
3557        int i, r = 0;
3558        bool need_full_reset  = *need_full_reset_arg;
3559
3560        /* block all schedulers and reset given job's ring */
3561        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3562                struct amdgpu_ring *ring = adev->rings[i];
3563
3564                if (!ring || !ring->sched.thread)
3565                        continue;
3566
3567                /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3568                amdgpu_fence_driver_force_completion(ring);
3569        }
3570
3571        if(job)
3572                drm_sched_increase_karma(&job->base);
3573
3574        /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3575        if (!amdgpu_sriov_vf(adev)) {
3576
3577                if (!need_full_reset)
3578                        need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3579
3580                if (!need_full_reset) {
3581                        amdgpu_device_ip_pre_soft_reset(adev);
3582                        r = amdgpu_device_ip_soft_reset(adev);
3583                        amdgpu_device_ip_post_soft_reset(adev);
3584                        if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3585                                DRM_INFO("soft reset failed, will fallback to full reset!\n");
3586                                need_full_reset = true;
3587                        }
3588                }
3589
3590                if (need_full_reset)
3591                        r = amdgpu_device_ip_suspend(adev);
3592
3593                *need_full_reset_arg = need_full_reset;
3594        }
3595
3596        return r;
3597}
3598
3599static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3600                               struct list_head *device_list_handle,
3601                               bool *need_full_reset_arg)
3602{
3603        struct amdgpu_device *tmp_adev = NULL;
3604        bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3605        int r = 0;
3606
3607        /*
3608         * ASIC reset has to be done on all HGMI hive nodes ASAP
3609         * to allow proper links negotiation in FW (within 1 sec)
3610         */
3611        if (need_full_reset) {
3612                list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3613                        /* For XGMI run all resets in parallel to speed up the process */
3614                        if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3615                                if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3616                                        r = -EALREADY;
3617                        } else
3618                                r = amdgpu_asic_reset(tmp_adev);
3619
3620                        if (r) {
3621                                DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3622                                         r, tmp_adev->ddev->unique);
3623                                break;
3624                        }
3625                }
3626
3627                /* For XGMI wait for all PSP resets to complete before proceed */
3628                if (!r) {
3629                        list_for_each_entry(tmp_adev, device_list_handle,
3630                                            gmc.xgmi.head) {
3631                                if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3632                                        flush_work(&tmp_adev->xgmi_reset_work);
3633                                        r = tmp_adev->asic_reset_res;
3634                                        if (r)
3635                                                break;
3636                                }
3637                        }
3638
3639                        list_for_each_entry(tmp_adev, device_list_handle,
3640                                        gmc.xgmi.head) {
3641                                amdgpu_ras_reserve_bad_pages(tmp_adev);
3642                        }
3643                }
3644        }
3645
3646
3647        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3648                if (need_full_reset) {
3649                        /* post card */
3650                        if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3651                                DRM_WARN("asic atom init failed!");
3652
3653                        if (!r) {
3654                                dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3655                                r = amdgpu_device_ip_resume_phase1(tmp_adev);
3656                                if (r)
3657                                        goto out;
3658
3659                                vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3660                                if (vram_lost) {
3661                                        DRM_INFO("VRAM is lost due to GPU reset!\n");
3662                                        amdgpu_inc_vram_lost(tmp_adev);
3663                                }
3664
3665                                r = amdgpu_gtt_mgr_recover(
3666                                        &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3667                                if (r)
3668                                        goto out;
3669
3670                                r = amdgpu_device_fw_loading(tmp_adev);
3671                                if (r)
3672                                        return r;
3673
3674                                r = amdgpu_device_ip_resume_phase2(tmp_adev);
3675                                if (r)
3676                                        goto out;
3677
3678                                if (vram_lost)
3679                                        amdgpu_device_fill_reset_magic(tmp_adev);
3680
3681                                /*
3682                                 * Add this ASIC as tracked as reset was already
3683                                 * complete successfully.
3684                                 */
3685                                amdgpu_register_gpu_instance(tmp_adev);
3686
3687                                r = amdgpu_device_ip_late_init(tmp_adev);
3688                                if (r)
3689                                        goto out;
3690
3691                                /* must succeed. */
3692                                amdgpu_ras_resume(tmp_adev);
3693
3694                                /* Update PSP FW topology after reset */
3695                                if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3696                                        r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3697                        }
3698                }
3699
3700
3701out:
3702                if (!r) {
3703                        amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3704                        r = amdgpu_ib_ring_tests(tmp_adev);
3705                        if (r) {
3706                                dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3707                                r = amdgpu_device_ip_suspend(tmp_adev);
3708                                need_full_reset = true;
3709                                r = -EAGAIN;
3710                                goto end;
3711                        }
3712                }
3713
3714                if (!r)
3715                        r = amdgpu_device_recover_vram(tmp_adev);
3716                else
3717                        tmp_adev->asic_reset_res = r;
3718        }
3719
3720end:
3721        *need_full_reset_arg = need_full_reset;
3722        return r;
3723}
3724
3725static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
3726{
3727        if (trylock) {
3728                if (!mutex_trylock(&adev->lock_reset))
3729                        return false;
3730        } else
3731                mutex_lock(&adev->lock_reset);
3732
3733        atomic_inc(&adev->gpu_reset_counter);
3734        adev->in_gpu_reset = 1;
3735        switch (amdgpu_asic_reset_method(adev)) {
3736        case AMD_RESET_METHOD_MODE1:
3737                adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3738                break;
3739        case AMD_RESET_METHOD_MODE2:
3740                adev->mp1_state = PP_MP1_STATE_RESET;
3741                break;
3742        default:
3743                adev->mp1_state = PP_MP1_STATE_NONE;
3744                break;
3745        }
3746        /* Block kfd: SRIOV would do it separately */
3747        if (!amdgpu_sriov_vf(adev))
3748                amdgpu_amdkfd_pre_reset(adev);
3749
3750        return true;
3751}
3752
3753static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3754{
3755        /*unlock kfd: SRIOV would do it separately */
3756        if (!amdgpu_sriov_vf(adev))
3757                amdgpu_amdkfd_post_reset(adev);
3758        amdgpu_vf_error_trans_all(adev);
3759        adev->mp1_state = PP_MP1_STATE_NONE;
3760        adev->in_gpu_reset = 0;
3761        mutex_unlock(&adev->lock_reset);
3762}
3763
3764
3765/**
3766 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3767 *
3768 * @adev: amdgpu device pointer
3769 * @job: which job trigger hang
3770 *
3771 * Attempt to reset the GPU if it has hung (all asics).
3772 * Attempt to do soft-reset or full-reset and reinitialize Asic
3773 * Returns 0 for success or an error on failure.
3774 */
3775
3776int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3777                              struct amdgpu_job *job)
3778{
3779        struct list_head device_list, *device_list_handle =  NULL;
3780        bool need_full_reset, job_signaled;
3781        struct amdgpu_hive_info *hive = NULL;
3782        struct amdgpu_device *tmp_adev = NULL;
3783        int i, r = 0;
3784
3785        need_full_reset = job_signaled = false;
3786        INIT_LIST_HEAD(&device_list);
3787
3788        dev_info(adev->dev, "GPU reset begin!\n");
3789
3790        cancel_delayed_work_sync(&adev->delayed_init_work);
3791
3792        hive = amdgpu_get_xgmi_hive(adev, false);
3793
3794        /*
3795         * Here we trylock to avoid chain of resets executing from
3796         * either trigger by jobs on different adevs in XGMI hive or jobs on
3797         * different schedulers for same device while this TO handler is running.
3798         * We always reset all schedulers for device and all devices for XGMI
3799         * hive so that should take care of them too.
3800         */
3801
3802        if (hive && !mutex_trylock(&hive->reset_lock)) {
3803                DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3804                          job ? job->base.id : -1, hive->hive_id);
3805                return 0;
3806        }
3807
3808        /* Start with adev pre asic reset first for soft reset check.*/
3809        if (!amdgpu_device_lock_adev(adev, !hive)) {
3810                DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3811                          job ? job->base.id : -1);
3812                return 0;
3813        }
3814
3815        /* Build list of devices to reset */
3816        if  (adev->gmc.xgmi.num_physical_nodes > 1) {
3817                if (!hive) {
3818                        amdgpu_device_unlock_adev(adev);
3819                        return -ENODEV;
3820                }
3821
3822                /*
3823                 * In case we are in XGMI hive mode device reset is done for all the
3824                 * nodes in the hive to retrain all XGMI links and hence the reset
3825                 * sequence is executed in loop on all nodes.
3826                 */
3827                device_list_handle = &hive->device_list;
3828        } else {
3829                list_add_tail(&adev->gmc.xgmi.head, &device_list);
3830                device_list_handle = &device_list;
3831        }
3832
3833        /*
3834         * Mark these ASICs to be reseted as untracked first
3835         * And add them back after reset completed
3836         */
3837        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3838                amdgpu_unregister_gpu_instance(tmp_adev);
3839
3840        /* block all schedulers and reset given job's ring */
3841        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3842                /* disable ras on ALL IPs */
3843                if (amdgpu_device_ip_need_full_reset(tmp_adev))
3844                        amdgpu_ras_suspend(tmp_adev);
3845
3846                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3847                        struct amdgpu_ring *ring = tmp_adev->rings[i];
3848
3849                        if (!ring || !ring->sched.thread)
3850                                continue;
3851
3852                        drm_sched_stop(&ring->sched, job ? &job->base : NULL);
3853                }
3854        }
3855
3856
3857        /*
3858         * Must check guilty signal here since after this point all old
3859         * HW fences are force signaled.
3860         *
3861         * job->base holds a reference to parent fence
3862         */
3863        if (job && job->base.s_fence->parent &&
3864            dma_fence_is_signaled(job->base.s_fence->parent))
3865                job_signaled = true;
3866
3867        if (!amdgpu_device_ip_need_full_reset(adev))
3868                device_list_handle = &device_list;
3869
3870        if (job_signaled) {
3871                dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3872                goto skip_hw_reset;
3873        }
3874
3875
3876        /* Guilty job will be freed after this*/
3877        r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
3878        if (r) {
3879                /*TODO Should we stop ?*/
3880                DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3881                          r, adev->ddev->unique);
3882                adev->asic_reset_res = r;
3883        }
3884
3885retry:  /* Rest of adevs pre asic reset from XGMI hive. */
3886        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3887
3888                if (tmp_adev == adev)
3889                        continue;
3890
3891                amdgpu_device_lock_adev(tmp_adev, false);
3892                r = amdgpu_device_pre_asic_reset(tmp_adev,
3893                                                 NULL,
3894                                                 &need_full_reset);
3895                /*TODO Should we stop ?*/
3896                if (r) {
3897                        DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3898                                  r, tmp_adev->ddev->unique);
3899                        tmp_adev->asic_reset_res = r;
3900                }
3901        }
3902
3903        /* Actual ASIC resets if needed.*/
3904        /* TODO Implement XGMI hive reset logic for SRIOV */
3905        if (amdgpu_sriov_vf(adev)) {
3906                r = amdgpu_device_reset_sriov(adev, job ? false : true);
3907                if (r)
3908                        adev->asic_reset_res = r;
3909        } else {
3910                r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3911                if (r && r == -EAGAIN)
3912                        goto retry;
3913        }
3914
3915skip_hw_reset:
3916
3917        /* Post ASIC reset for all devs .*/
3918        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3919                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3920                        struct amdgpu_ring *ring = tmp_adev->rings[i];
3921
3922                        if (!ring || !ring->sched.thread)
3923                                continue;
3924
3925                        /* No point to resubmit jobs if we didn't HW reset*/
3926                        if (!tmp_adev->asic_reset_res && !job_signaled)
3927                                drm_sched_resubmit_jobs(&ring->sched);
3928
3929                        drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3930                }
3931
3932                if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3933                        drm_helper_resume_force_mode(tmp_adev->ddev);
3934                }
3935
3936                tmp_adev->asic_reset_res = 0;
3937
3938                if (r) {
3939                        /* bad news, how to tell it to userspace ? */
3940                        dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3941                        amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3942                } else {
3943                        dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3944                }
3945
3946                amdgpu_device_unlock_adev(tmp_adev);
3947        }
3948
3949        if (hive)
3950                mutex_unlock(&hive->reset_lock);
3951
3952        if (r)
3953                dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
3954        return r;
3955}
3956
3957/**
3958 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3959 *
3960 * @adev: amdgpu_device pointer
3961 *
3962 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3963 * and lanes) of the slot the device is in. Handles APUs and
3964 * virtualized environments where PCIE config space may not be available.
3965 */
3966static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3967{
3968        struct pci_dev *pdev;
3969        enum pci_bus_speed speed_cap, platform_speed_cap;
3970        enum pcie_link_width platform_link_width;
3971
3972        if (amdgpu_pcie_gen_cap)
3973                adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
3974
3975        if (amdgpu_pcie_lane_cap)
3976                adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
3977
3978        /* covers APUs as well */
3979        if (pci_is_root_bus(adev->pdev->bus)) {
3980                if (adev->pm.pcie_gen_mask == 0)
3981                        adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3982                if (adev->pm.pcie_mlw_mask == 0)
3983                        adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3984                return;
3985        }
3986
3987        if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3988                return;
3989
3990        pcie_bandwidth_available(adev->pdev, NULL,
3991                                 &platform_speed_cap, &platform_link_width);
3992
3993        if (adev->pm.pcie_gen_mask == 0) {
3994                /* asic caps */
3995                pdev = adev->pdev;
3996                speed_cap = pcie_get_speed_cap(pdev);
3997                if (speed_cap == PCI_SPEED_UNKNOWN) {
3998                        adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3999                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4000                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4001                } else {
4002                        if (speed_cap == PCIE_SPEED_16_0GT)
4003                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4004                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4005                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4006                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4007                        else if (speed_cap == PCIE_SPEED_8_0GT)
4008                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4009                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4010                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4011                        else if (speed_cap == PCIE_SPEED_5_0GT)
4012                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4013                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4014                        else
4015                                adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4016                }
4017                /* platform caps */
4018                if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4019                        adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4020                                                   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4021                } else {
4022                        if (platform_speed_cap == PCIE_SPEED_16_0GT)
4023                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4024                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4025                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4026                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4027                        else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4028                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4029                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4030                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4031                        else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4032                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4033                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4034                        else
4035                                adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4036
4037                }
4038        }
4039        if (adev->pm.pcie_mlw_mask == 0) {
4040                if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4041                        adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4042                } else {
4043                        switch (platform_link_width) {
4044                        case PCIE_LNK_X32:
4045                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4046                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4047                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4048                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4049                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4050                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4051                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4052                                break;
4053                        case PCIE_LNK_X16:
4054                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4055                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4056                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4057                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4058                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4059                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4060                                break;
4061                        case PCIE_LNK_X12:
4062                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4063                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4064                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4065                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4066                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4067                                break;
4068                        case PCIE_LNK_X8:
4069                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4070                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4071                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4072                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4073                                break;
4074                        case PCIE_LNK_X4:
4075                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4076                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4077                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4078                                break;
4079                        case PCIE_LNK_X2:
4080                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4081                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4082                                break;
4083                        case PCIE_LNK_X1:
4084                                adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4085                                break;
4086                        default:
4087                                break;
4088                        }
4089                }
4090        }
4091}
4092
4093