linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/kthread.h>
  29#include <linux/console.h>
  30#include <linux/slab.h>
  31#include <drm/drmP.h>
  32#include <drm/drm_crtc_helper.h>
  33#include <drm/drm_atomic_helper.h>
  34#include <drm/amdgpu_drm.h>
  35#include <linux/vgaarb.h>
  36#include <linux/vga_switcheroo.h>
  37#include <linux/efi.h>
  38#include "amdgpu.h"
  39#include "amdgpu_trace.h"
  40#include "amdgpu_i2c.h"
  41#include "atom.h"
  42#include "amdgpu_atombios.h"
  43#include "amdgpu_atomfirmware.h"
  44#include "amd_pcie.h"
  45#ifdef CONFIG_DRM_AMDGPU_SI
  46#include "si.h"
  47#endif
  48#ifdef CONFIG_DRM_AMDGPU_CIK
  49#include "cik.h"
  50#endif
  51#include "vi.h"
  52#include "soc15.h"
  53#include "bif/bif_4_1_d.h"
  54#include <linux/pci.h>
  55#include <linux/firmware.h>
  56#include "amdgpu_vf_error.h"
  57
  58#include "amdgpu_amdkfd.h"
  59#include "amdgpu_pm.h"
  60
  61MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  62MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  63MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  64
  65#define AMDGPU_RESUME_MS                2000
  66
  67static const char *amdgpu_asic_name[] = {
  68        "TAHITI",
  69        "PITCAIRN",
  70        "VERDE",
  71        "OLAND",
  72        "HAINAN",
  73        "BONAIRE",
  74        "KAVERI",
  75        "KABINI",
  76        "HAWAII",
  77        "MULLINS",
  78        "TOPAZ",
  79        "TONGA",
  80        "FIJI",
  81        "CARRIZO",
  82        "STONEY",
  83        "POLARIS10",
  84        "POLARIS11",
  85        "POLARIS12",
  86        "VEGA10",
  87        "VEGA12",
  88        "RAVEN",
  89        "LAST",
  90};
  91
  92static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
  93
  94/**
  95 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
  96 *
  97 * @dev: drm_device pointer
  98 *
  99 * Returns true if the device is a dGPU with HG/PX power control,
 100 * otherwise return false.
 101 */
 102bool amdgpu_device_is_px(struct drm_device *dev)
 103{
 104        struct amdgpu_device *adev = dev->dev_private;
 105
 106        if (adev->flags & AMD_IS_PX)
 107                return true;
 108        return false;
 109}
 110
 111/*
 112 * MMIO register access helper functions.
 113 */
 114/**
 115 * amdgpu_mm_rreg - read a memory mapped IO register
 116 *
 117 * @adev: amdgpu_device pointer
 118 * @reg: dword aligned register offset
 119 * @acc_flags: access flags which require special behavior
 120 *
 121 * Returns the 32 bit value from the offset specified.
 122 */
 123uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 124                        uint32_t acc_flags)
 125{
 126        uint32_t ret;
 127
 128        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 129                return amdgpu_virt_kiq_rreg(adev, reg);
 130
 131        if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 132                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 133        else {
 134                unsigned long flags;
 135
 136                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 137                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 138                ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 139                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 140        }
 141        trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
 142        return ret;
 143}
 144
 145/*
 146 * MMIO register read with bytes helper functions
 147 * @offset:bytes offset from MMIO start
 148 *
 149*/
 150
 151/**
 152 * amdgpu_mm_rreg8 - read a memory mapped IO register
 153 *
 154 * @adev: amdgpu_device pointer
 155 * @offset: byte aligned register offset
 156 *
 157 * Returns the 8 bit value from the offset specified.
 158 */
 159uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
 160        if (offset < adev->rmmio_size)
 161                return (readb(adev->rmmio + offset));
 162        BUG();
 163}
 164
 165/*
 166 * MMIO register write with bytes helper functions
 167 * @offset:bytes offset from MMIO start
 168 * @value: the value want to be written to the register
 169 *
 170*/
 171/**
 172 * amdgpu_mm_wreg8 - read a memory mapped IO register
 173 *
 174 * @adev: amdgpu_device pointer
 175 * @offset: byte aligned register offset
 176 * @value: 8 bit value to write
 177 *
 178 * Writes the value specified to the offset specified.
 179 */
 180void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
 181        if (offset < adev->rmmio_size)
 182                writeb(value, adev->rmmio + offset);
 183        else
 184                BUG();
 185}
 186
 187/**
 188 * amdgpu_mm_wreg - write to a memory mapped IO register
 189 *
 190 * @adev: amdgpu_device pointer
 191 * @reg: dword aligned register offset
 192 * @v: 32 bit value to write to the register
 193 * @acc_flags: access flags which require special behavior
 194 *
 195 * Writes the value specified to the offset specified.
 196 */
 197void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 198                    uint32_t acc_flags)
 199{
 200        trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
 201
 202        if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 203                adev->last_mm_index = v;
 204        }
 205
 206        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 207                return amdgpu_virt_kiq_wreg(adev, reg, v);
 208
 209        if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 210                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 211        else {
 212                unsigned long flags;
 213
 214                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 215                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 216                writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 217                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 218        }
 219
 220        if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 221                udelay(500);
 222        }
 223}
 224
 225/**
 226 * amdgpu_io_rreg - read an IO register
 227 *
 228 * @adev: amdgpu_device pointer
 229 * @reg: dword aligned register offset
 230 *
 231 * Returns the 32 bit value from the offset specified.
 232 */
 233u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 234{
 235        if ((reg * 4) < adev->rio_mem_size)
 236                return ioread32(adev->rio_mem + (reg * 4));
 237        else {
 238                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 239                return ioread32(adev->rio_mem + (mmMM_DATA * 4));
 240        }
 241}
 242
 243/**
 244 * amdgpu_io_wreg - write to an IO register
 245 *
 246 * @adev: amdgpu_device pointer
 247 * @reg: dword aligned register offset
 248 * @v: 32 bit value to write to the register
 249 *
 250 * Writes the value specified to the offset specified.
 251 */
 252void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 253{
 254        if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 255                adev->last_mm_index = v;
 256        }
 257
 258        if ((reg * 4) < adev->rio_mem_size)
 259                iowrite32(v, adev->rio_mem + (reg * 4));
 260        else {
 261                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 262                iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
 263        }
 264
 265        if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 266                udelay(500);
 267        }
 268}
 269
 270/**
 271 * amdgpu_mm_rdoorbell - read a doorbell dword
 272 *
 273 * @adev: amdgpu_device pointer
 274 * @index: doorbell index
 275 *
 276 * Returns the value in the doorbell aperture at the
 277 * requested doorbell index (CIK).
 278 */
 279u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 280{
 281        if (index < adev->doorbell.num_doorbells) {
 282                return readl(adev->doorbell.ptr + index);
 283        } else {
 284                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 285                return 0;
 286        }
 287}
 288
 289/**
 290 * amdgpu_mm_wdoorbell - write a doorbell dword
 291 *
 292 * @adev: amdgpu_device pointer
 293 * @index: doorbell index
 294 * @v: value to write
 295 *
 296 * Writes @v to the doorbell aperture at the
 297 * requested doorbell index (CIK).
 298 */
 299void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 300{
 301        if (index < adev->doorbell.num_doorbells) {
 302                writel(v, adev->doorbell.ptr + index);
 303        } else {
 304                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 305        }
 306}
 307
 308/**
 309 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 310 *
 311 * @adev: amdgpu_device pointer
 312 * @index: doorbell index
 313 *
 314 * Returns the value in the doorbell aperture at the
 315 * requested doorbell index (VEGA10+).
 316 */
 317u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 318{
 319        if (index < adev->doorbell.num_doorbells) {
 320                return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 321        } else {
 322                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 323                return 0;
 324        }
 325}
 326
 327/**
 328 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 329 *
 330 * @adev: amdgpu_device pointer
 331 * @index: doorbell index
 332 * @v: value to write
 333 *
 334 * Writes @v to the doorbell aperture at the
 335 * requested doorbell index (VEGA10+).
 336 */
 337void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 338{
 339        if (index < adev->doorbell.num_doorbells) {
 340                atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 341        } else {
 342                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 343        }
 344}
 345
 346/**
 347 * amdgpu_invalid_rreg - dummy reg read function
 348 *
 349 * @adev: amdgpu device pointer
 350 * @reg: offset of register
 351 *
 352 * Dummy register read function.  Used for register blocks
 353 * that certain asics don't have (all asics).
 354 * Returns the value in the register.
 355 */
 356static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 357{
 358        DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 359        BUG();
 360        return 0;
 361}
 362
 363/**
 364 * amdgpu_invalid_wreg - dummy reg write function
 365 *
 366 * @adev: amdgpu device pointer
 367 * @reg: offset of register
 368 * @v: value to write to the register
 369 *
 370 * Dummy register read function.  Used for register blocks
 371 * that certain asics don't have (all asics).
 372 */
 373static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 374{
 375        DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 376                  reg, v);
 377        BUG();
 378}
 379
 380/**
 381 * amdgpu_block_invalid_rreg - dummy reg read function
 382 *
 383 * @adev: amdgpu device pointer
 384 * @block: offset of instance
 385 * @reg: offset of register
 386 *
 387 * Dummy register read function.  Used for register blocks
 388 * that certain asics don't have (all asics).
 389 * Returns the value in the register.
 390 */
 391static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 392                                          uint32_t block, uint32_t reg)
 393{
 394        DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 395                  reg, block);
 396        BUG();
 397        return 0;
 398}
 399
 400/**
 401 * amdgpu_block_invalid_wreg - dummy reg write function
 402 *
 403 * @adev: amdgpu device pointer
 404 * @block: offset of instance
 405 * @reg: offset of register
 406 * @v: value to write to the register
 407 *
 408 * Dummy register read function.  Used for register blocks
 409 * that certain asics don't have (all asics).
 410 */
 411static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 412                                      uint32_t block,
 413                                      uint32_t reg, uint32_t v)
 414{
 415        DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 416                  reg, block, v);
 417        BUG();
 418}
 419
 420/**
 421 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 422 *
 423 * @adev: amdgpu device pointer
 424 *
 425 * Allocates a scratch page of VRAM for use by various things in the
 426 * driver.
 427 */
 428static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 429{
 430        return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 431                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 432                                       &adev->vram_scratch.robj,
 433                                       &adev->vram_scratch.gpu_addr,
 434                                       (void **)&adev->vram_scratch.ptr);
 435}
 436
 437/**
 438 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 439 *
 440 * @adev: amdgpu device pointer
 441 *
 442 * Frees the VRAM scratch page.
 443 */
 444static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 445{
 446        amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 447}
 448
 449/**
 450 * amdgpu_device_program_register_sequence - program an array of registers.
 451 *
 452 * @adev: amdgpu_device pointer
 453 * @registers: pointer to the register array
 454 * @array_size: size of the register array
 455 *
 456 * Programs an array or registers with and and or masks.
 457 * This is a helper for setting golden registers.
 458 */
 459void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 460                                             const u32 *registers,
 461                                             const u32 array_size)
 462{
 463        u32 tmp, reg, and_mask, or_mask;
 464        int i;
 465
 466        if (array_size % 3)
 467                return;
 468
 469        for (i = 0; i < array_size; i +=3) {
 470                reg = registers[i + 0];
 471                and_mask = registers[i + 1];
 472                or_mask = registers[i + 2];
 473
 474                if (and_mask == 0xffffffff) {
 475                        tmp = or_mask;
 476                } else {
 477                        tmp = RREG32(reg);
 478                        tmp &= ~and_mask;
 479                        tmp |= or_mask;
 480                }
 481                WREG32(reg, tmp);
 482        }
 483}
 484
 485/**
 486 * amdgpu_device_pci_config_reset - reset the GPU
 487 *
 488 * @adev: amdgpu_device pointer
 489 *
 490 * Resets the GPU using the pci config reset sequence.
 491 * Only applicable to asics prior to vega10.
 492 */
 493void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 494{
 495        pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 496}
 497
 498/*
 499 * GPU doorbell aperture helpers function.
 500 */
 501/**
 502 * amdgpu_device_doorbell_init - Init doorbell driver information.
 503 *
 504 * @adev: amdgpu_device pointer
 505 *
 506 * Init doorbell driver information (CIK)
 507 * Returns 0 on success, error on failure.
 508 */
 509static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 510{
 511        /* No doorbell on SI hardware generation */
 512        if (adev->asic_type < CHIP_BONAIRE) {
 513                adev->doorbell.base = 0;
 514                adev->doorbell.size = 0;
 515                adev->doorbell.num_doorbells = 0;
 516                adev->doorbell.ptr = NULL;
 517                return 0;
 518        }
 519
 520        if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 521                return -EINVAL;
 522
 523        /* doorbell bar mapping */
 524        adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 525        adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 526
 527        adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 528                                             AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
 529        if (adev->doorbell.num_doorbells == 0)
 530                return -EINVAL;
 531
 532        adev->doorbell.ptr = ioremap(adev->doorbell.base,
 533                                     adev->doorbell.num_doorbells *
 534                                     sizeof(u32));
 535        if (adev->doorbell.ptr == NULL)
 536                return -ENOMEM;
 537
 538        return 0;
 539}
 540
 541/**
 542 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
 543 *
 544 * @adev: amdgpu_device pointer
 545 *
 546 * Tear down doorbell driver information (CIK)
 547 */
 548static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
 549{
 550        iounmap(adev->doorbell.ptr);
 551        adev->doorbell.ptr = NULL;
 552}
 553
 554
 555
 556/*
 557 * amdgpu_device_wb_*()
 558 * Writeback is the method by which the GPU updates special pages in memory
 559 * with the status of certain GPU events (fences, ring pointers,etc.).
 560 */
 561
 562/**
 563 * amdgpu_device_wb_fini - Disable Writeback and free memory
 564 *
 565 * @adev: amdgpu_device pointer
 566 *
 567 * Disables Writeback and frees the Writeback memory (all asics).
 568 * Used at driver shutdown.
 569 */
 570static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
 571{
 572        if (adev->wb.wb_obj) {
 573                amdgpu_bo_free_kernel(&adev->wb.wb_obj,
 574                                      &adev->wb.gpu_addr,
 575                                      (void **)&adev->wb.wb);
 576                adev->wb.wb_obj = NULL;
 577        }
 578}
 579
 580/**
 581 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
 582 *
 583 * @adev: amdgpu_device pointer
 584 *
 585 * Initializes writeback and allocates writeback memory (all asics).
 586 * Used at driver startup.
 587 * Returns 0 on success or an -error on failure.
 588 */
 589static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 590{
 591        int r;
 592
 593        if (adev->wb.wb_obj == NULL) {
 594                /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
 595                r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
 596                                            PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 597                                            &adev->wb.wb_obj, &adev->wb.gpu_addr,
 598                                            (void **)&adev->wb.wb);
 599                if (r) {
 600                        dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
 601                        return r;
 602                }
 603
 604                adev->wb.num_wb = AMDGPU_MAX_WB;
 605                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 606
 607                /* clear wb memory */
 608                memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 609        }
 610
 611        return 0;
 612}
 613
 614/**
 615 * amdgpu_device_wb_get - Allocate a wb entry
 616 *
 617 * @adev: amdgpu_device pointer
 618 * @wb: wb index
 619 *
 620 * Allocate a wb slot for use by the driver (all asics).
 621 * Returns 0 on success or -EINVAL on failure.
 622 */
 623int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 624{
 625        unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 626
 627        if (offset < adev->wb.num_wb) {
 628                __set_bit(offset, adev->wb.used);
 629                *wb = offset << 3; /* convert to dw offset */
 630                return 0;
 631        } else {
 632                return -EINVAL;
 633        }
 634}
 635
 636/**
 637 * amdgpu_device_wb_free - Free a wb entry
 638 *
 639 * @adev: amdgpu_device pointer
 640 * @wb: wb index
 641 *
 642 * Free a wb slot allocated for use by the driver (all asics)
 643 */
 644void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 645{
 646        wb >>= 3;
 647        if (wb < adev->wb.num_wb)
 648                __clear_bit(wb, adev->wb.used);
 649}
 650
 651/**
 652 * amdgpu_device_vram_location - try to find VRAM location
 653 *
 654 * @adev: amdgpu device structure holding all necessary informations
 655 * @mc: memory controller structure holding memory informations
 656 * @base: base address at which to put VRAM
 657 *
 658 * Function will try to place VRAM at base address provided
 659 * as parameter.
 660 */
 661void amdgpu_device_vram_location(struct amdgpu_device *adev,
 662                                 struct amdgpu_gmc *mc, u64 base)
 663{
 664        uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
 665
 666        mc->vram_start = base;
 667        mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
 668        if (limit && limit < mc->real_vram_size)
 669                mc->real_vram_size = limit;
 670        dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
 671                        mc->mc_vram_size >> 20, mc->vram_start,
 672                        mc->vram_end, mc->real_vram_size >> 20);
 673}
 674
 675/**
 676 * amdgpu_device_gart_location - try to find GTT location
 677 *
 678 * @adev: amdgpu device structure holding all necessary informations
 679 * @mc: memory controller structure holding memory informations
 680 *
 681 * Function will place try to place GTT before or after VRAM.
 682 *
 683 * If GTT size is bigger than space left then we ajust GTT size.
 684 * Thus function will never fails.
 685 *
 686 * FIXME: when reducing GTT size align new size on power of 2.
 687 */
 688void amdgpu_device_gart_location(struct amdgpu_device *adev,
 689                                 struct amdgpu_gmc *mc)
 690{
 691        u64 size_af, size_bf;
 692
 693        size_af = adev->gmc.mc_mask - mc->vram_end;
 694        size_bf = mc->vram_start;
 695        if (size_bf > size_af) {
 696                if (mc->gart_size > size_bf) {
 697                        dev_warn(adev->dev, "limiting GTT\n");
 698                        mc->gart_size = size_bf;
 699                }
 700                mc->gart_start = 0;
 701        } else {
 702                if (mc->gart_size > size_af) {
 703                        dev_warn(adev->dev, "limiting GTT\n");
 704                        mc->gart_size = size_af;
 705                }
 706                /* VCE doesn't like it when BOs cross a 4GB segment, so align
 707                 * the GART base on a 4GB boundary as well.
 708                 */
 709                mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
 710        }
 711        mc->gart_end = mc->gart_start + mc->gart_size - 1;
 712        dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
 713                        mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 714}
 715
 716/**
 717 * amdgpu_device_resize_fb_bar - try to resize FB BAR
 718 *
 719 * @adev: amdgpu_device pointer
 720 *
 721 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
 722 * to fail, but if any of the BARs is not accessible after the size we abort
 723 * driver loading by returning -ENODEV.
 724 */
 725int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 726{
 727        u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 728        u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 729        struct pci_bus *root;
 730        struct resource *res;
 731        unsigned i;
 732        u16 cmd;
 733        int r;
 734
 735        /* Bypass for VF */
 736        if (amdgpu_sriov_vf(adev))
 737                return 0;
 738
 739        /* Check if the root BUS has 64bit memory resources */
 740        root = adev->pdev->bus;
 741        while (root->parent)
 742                root = root->parent;
 743
 744        pci_bus_for_each_resource(root, res, i) {
 745                if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 746                    res->start > 0x100000000ull)
 747                        break;
 748        }
 749
 750        /* Trying to resize is pointless without a root hub window above 4GB */
 751        if (!res)
 752                return 0;
 753
 754        /* Disable memory decoding while we change the BAR addresses and size */
 755        pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
 756        pci_write_config_word(adev->pdev, PCI_COMMAND,
 757                              cmd & ~PCI_COMMAND_MEMORY);
 758
 759        /* Free the VRAM and doorbell BAR, we most likely need to move both. */
 760        amdgpu_device_doorbell_fini(adev);
 761        if (adev->asic_type >= CHIP_BONAIRE)
 762                pci_release_resource(adev->pdev, 2);
 763
 764        pci_release_resource(adev->pdev, 0);
 765
 766        r = pci_resize_resource(adev->pdev, 0, rbar_size);
 767        if (r == -ENOSPC)
 768                DRM_INFO("Not enough PCI address space for a large BAR.");
 769        else if (r && r != -ENOTSUPP)
 770                DRM_ERROR("Problem resizing BAR0 (%d).", r);
 771
 772        pci_assign_unassigned_bus_resources(adev->pdev->bus);
 773
 774        /* When the doorbell or fb BAR isn't available we have no chance of
 775         * using the device.
 776         */
 777        r = amdgpu_device_doorbell_init(adev);
 778        if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
 779                return -ENODEV;
 780
 781        pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
 782
 783        return 0;
 784}
 785
 786/*
 787 * GPU helpers function.
 788 */
 789/**
 790 * amdgpu_device_need_post - check if the hw need post or not
 791 *
 792 * @adev: amdgpu_device pointer
 793 *
 794 * Check if the asic has been initialized (all asics) at driver startup
 795 * or post is needed if  hw reset is performed.
 796 * Returns true if need or false if not.
 797 */
 798bool amdgpu_device_need_post(struct amdgpu_device *adev)
 799{
 800        uint32_t reg;
 801
 802        if (amdgpu_sriov_vf(adev))
 803                return false;
 804
 805        if (amdgpu_passthrough(adev)) {
 806                /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
 807                 * some old smc fw still need driver do vPost otherwise gpu hang, while
 808                 * those smc fw version above 22.15 doesn't have this flaw, so we force
 809                 * vpost executed for smc version below 22.15
 810                 */
 811                if (adev->asic_type == CHIP_FIJI) {
 812                        int err;
 813                        uint32_t fw_ver;
 814                        err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
 815                        /* force vPost if error occured */
 816                        if (err)
 817                                return true;
 818
 819                        fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
 820                        if (fw_ver < 0x00160e00)
 821                                return true;
 822                }
 823        }
 824
 825        if (adev->has_hw_reset) {
 826                adev->has_hw_reset = false;
 827                return true;
 828        }
 829
 830        /* bios scratch used on CIK+ */
 831        if (adev->asic_type >= CHIP_BONAIRE)
 832                return amdgpu_atombios_scratch_need_asic_init(adev);
 833
 834        /* check MEM_SIZE for older asics */
 835        reg = amdgpu_asic_get_config_memsize(adev);
 836
 837        if ((reg != 0) && (reg != 0xffffffff))
 838                return false;
 839
 840        return true;
 841}
 842
 843/* if we get transitioned to only one device, take VGA back */
 844/**
 845 * amdgpu_device_vga_set_decode - enable/disable vga decode
 846 *
 847 * @cookie: amdgpu_device pointer
 848 * @state: enable/disable vga decode
 849 *
 850 * Enable/disable vga decode (all asics).
 851 * Returns VGA resource flags.
 852 */
 853static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
 854{
 855        struct amdgpu_device *adev = cookie;
 856        amdgpu_asic_set_vga_state(adev, state);
 857        if (state)
 858                return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
 859                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 860        else
 861                return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 862}
 863
 864/**
 865 * amdgpu_device_check_block_size - validate the vm block size
 866 *
 867 * @adev: amdgpu_device pointer
 868 *
 869 * Validates the vm block size specified via module parameter.
 870 * The vm block size defines number of bits in page table versus page directory,
 871 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 872 * page table and the remaining bits are in the page directory.
 873 */
 874static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 875{
 876        /* defines number of bits in page table versus page directory,
 877         * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 878         * page table and the remaining bits are in the page directory */
 879        if (amdgpu_vm_block_size == -1)
 880                return;
 881
 882        if (amdgpu_vm_block_size < 9) {
 883                dev_warn(adev->dev, "VM page table size (%d) too small\n",
 884                         amdgpu_vm_block_size);
 885                amdgpu_vm_block_size = -1;
 886        }
 887}
 888
 889/**
 890 * amdgpu_device_check_vm_size - validate the vm size
 891 *
 892 * @adev: amdgpu_device pointer
 893 *
 894 * Validates the vm size in GB specified via module parameter.
 895 * The VM size is the size of the GPU virtual memory space in GB.
 896 */
 897static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 898{
 899        /* no need to check the default value */
 900        if (amdgpu_vm_size == -1)
 901                return;
 902
 903        if (amdgpu_vm_size < 1) {
 904                dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
 905                         amdgpu_vm_size);
 906                amdgpu_vm_size = -1;
 907        }
 908}
 909
 910/**
 911 * amdgpu_device_check_arguments - validate module params
 912 *
 913 * @adev: amdgpu_device pointer
 914 *
 915 * Validates certain module parameters and updates
 916 * the associated values used by the driver (all asics).
 917 */
 918static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 919{
 920        if (amdgpu_sched_jobs < 4) {
 921                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 922                         amdgpu_sched_jobs);
 923                amdgpu_sched_jobs = 4;
 924        } else if (!is_power_of_2(amdgpu_sched_jobs)){
 925                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
 926                         amdgpu_sched_jobs);
 927                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
 928        }
 929
 930        if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
 931                /* gart size must be greater or equal to 32M */
 932                dev_warn(adev->dev, "gart size (%d) too small\n",
 933                         amdgpu_gart_size);
 934                amdgpu_gart_size = -1;
 935        }
 936
 937        if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
 938                /* gtt size must be greater or equal to 32M */
 939                dev_warn(adev->dev, "gtt size (%d) too small\n",
 940                                 amdgpu_gtt_size);
 941                amdgpu_gtt_size = -1;
 942        }
 943
 944        /* valid range is between 4 and 9 inclusive */
 945        if (amdgpu_vm_fragment_size != -1 &&
 946            (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
 947                dev_warn(adev->dev, "valid range is between 4 and 9\n");
 948                amdgpu_vm_fragment_size = -1;
 949        }
 950
 951        amdgpu_device_check_vm_size(adev);
 952
 953        amdgpu_device_check_block_size(adev);
 954
 955        if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
 956            !is_power_of_2(amdgpu_vram_page_split))) {
 957                dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
 958                         amdgpu_vram_page_split);
 959                amdgpu_vram_page_split = 1024;
 960        }
 961
 962        if (amdgpu_lockup_timeout == 0) {
 963                dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
 964                amdgpu_lockup_timeout = 10000;
 965        }
 966
 967        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 968}
 969
 970/**
 971 * amdgpu_switcheroo_set_state - set switcheroo state
 972 *
 973 * @pdev: pci dev pointer
 974 * @state: vga_switcheroo state
 975 *
 976 * Callback for the switcheroo driver.  Suspends or resumes the
 977 * the asics before or after it is powered up using ACPI methods.
 978 */
 979static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
 980{
 981        struct drm_device *dev = pci_get_drvdata(pdev);
 982
 983        if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
 984                return;
 985
 986        if (state == VGA_SWITCHEROO_ON) {
 987                pr_info("amdgpu: switched on\n");
 988                /* don't suspend or resume card normally */
 989                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 990
 991                amdgpu_device_resume(dev, true, true);
 992
 993                dev->switch_power_state = DRM_SWITCH_POWER_ON;
 994                drm_kms_helper_poll_enable(dev);
 995        } else {
 996                pr_info("amdgpu: switched off\n");
 997                drm_kms_helper_poll_disable(dev);
 998                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 999                amdgpu_device_suspend(dev, true, true);
1000                dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1001        }
1002}
1003
1004/**
1005 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1006 *
1007 * @pdev: pci dev pointer
1008 *
1009 * Callback for the switcheroo driver.  Check of the switcheroo
1010 * state can be changed.
1011 * Returns true if the state can be changed, false if not.
1012 */
1013static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1014{
1015        struct drm_device *dev = pci_get_drvdata(pdev);
1016
1017        /*
1018        * FIXME: open_count is protected by drm_global_mutex but that would lead to
1019        * locking inversion with the driver load path. And the access here is
1020        * completely racy anyway. So don't bother with locking for now.
1021        */
1022        return dev->open_count == 0;
1023}
1024
1025static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1026        .set_gpu_state = amdgpu_switcheroo_set_state,
1027        .reprobe = NULL,
1028        .can_switch = amdgpu_switcheroo_can_switch,
1029};
1030
1031/**
1032 * amdgpu_device_ip_set_clockgating_state - set the CG state
1033 *
1034 * @adev: amdgpu_device pointer
1035 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1036 * @state: clockgating state (gate or ungate)
1037 *
1038 * Sets the requested clockgating state for all instances of
1039 * the hardware IP specified.
1040 * Returns the error code from the last instance.
1041 */
1042int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
1043                                           enum amd_ip_block_type block_type,
1044                                           enum amd_clockgating_state state)
1045{
1046        int i, r = 0;
1047
1048        for (i = 0; i < adev->num_ip_blocks; i++) {
1049                if (!adev->ip_blocks[i].status.valid)
1050                        continue;
1051                if (adev->ip_blocks[i].version->type != block_type)
1052                        continue;
1053                if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1054                        continue;
1055                r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1056                        (void *)adev, state);
1057                if (r)
1058                        DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1059                                  adev->ip_blocks[i].version->funcs->name, r);
1060        }
1061        return r;
1062}
1063
1064/**
1065 * amdgpu_device_ip_set_powergating_state - set the PG state
1066 *
1067 * @adev: amdgpu_device pointer
1068 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1069 * @state: powergating state (gate or ungate)
1070 *
1071 * Sets the requested powergating state for all instances of
1072 * the hardware IP specified.
1073 * Returns the error code from the last instance.
1074 */
1075int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
1076                                           enum amd_ip_block_type block_type,
1077                                           enum amd_powergating_state state)
1078{
1079        int i, r = 0;
1080
1081        for (i = 0; i < adev->num_ip_blocks; i++) {
1082                if (!adev->ip_blocks[i].status.valid)
1083                        continue;
1084                if (adev->ip_blocks[i].version->type != block_type)
1085                        continue;
1086                if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1087                        continue;
1088                r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1089                        (void *)adev, state);
1090                if (r)
1091                        DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1092                                  adev->ip_blocks[i].version->funcs->name, r);
1093        }
1094        return r;
1095}
1096
1097/**
1098 * amdgpu_device_ip_get_clockgating_state - get the CG state
1099 *
1100 * @adev: amdgpu_device pointer
1101 * @flags: clockgating feature flags
1102 *
1103 * Walks the list of IPs on the device and updates the clockgating
1104 * flags for each IP.
1105 * Updates @flags with the feature flags for each hardware IP where
1106 * clockgating is enabled.
1107 */
1108void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1109                                            u32 *flags)
1110{
1111        int i;
1112
1113        for (i = 0; i < adev->num_ip_blocks; i++) {
1114                if (!adev->ip_blocks[i].status.valid)
1115                        continue;
1116                if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1117                        adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1118        }
1119}
1120
1121/**
1122 * amdgpu_device_ip_wait_for_idle - wait for idle
1123 *
1124 * @adev: amdgpu_device pointer
1125 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1126 *
1127 * Waits for the request hardware IP to be idle.
1128 * Returns 0 for success or a negative error code on failure.
1129 */
1130int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1131                                   enum amd_ip_block_type block_type)
1132{
1133        int i, r;
1134
1135        for (i = 0; i < adev->num_ip_blocks; i++) {
1136                if (!adev->ip_blocks[i].status.valid)
1137                        continue;
1138                if (adev->ip_blocks[i].version->type == block_type) {
1139                        r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1140                        if (r)
1141                                return r;
1142                        break;
1143                }
1144        }
1145        return 0;
1146
1147}
1148
1149/**
1150 * amdgpu_device_ip_is_idle - is the hardware IP idle
1151 *
1152 * @adev: amdgpu_device pointer
1153 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1154 *
1155 * Check if the hardware IP is idle or not.
1156 * Returns true if it the IP is idle, false if not.
1157 */
1158bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1159                              enum amd_ip_block_type block_type)
1160{
1161        int i;
1162
1163        for (i = 0; i < adev->num_ip_blocks; i++) {
1164                if (!adev->ip_blocks[i].status.valid)
1165                        continue;
1166                if (adev->ip_blocks[i].version->type == block_type)
1167                        return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1168        }
1169        return true;
1170
1171}
1172
1173/**
1174 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1175 *
1176 * @adev: amdgpu_device pointer
1177 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1178 *
1179 * Returns a pointer to the hardware IP block structure
1180 * if it exists for the asic, otherwise NULL.
1181 */
1182struct amdgpu_ip_block *
1183amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1184                              enum amd_ip_block_type type)
1185{
1186        int i;
1187
1188        for (i = 0; i < adev->num_ip_blocks; i++)
1189                if (adev->ip_blocks[i].version->type == type)
1190                        return &adev->ip_blocks[i];
1191
1192        return NULL;
1193}
1194
1195/**
1196 * amdgpu_device_ip_block_version_cmp
1197 *
1198 * @adev: amdgpu_device pointer
1199 * @type: enum amd_ip_block_type
1200 * @major: major version
1201 * @minor: minor version
1202 *
1203 * return 0 if equal or greater
1204 * return 1 if smaller or the ip_block doesn't exist
1205 */
1206int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1207                                       enum amd_ip_block_type type,
1208                                       u32 major, u32 minor)
1209{
1210        struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1211
1212        if (ip_block && ((ip_block->version->major > major) ||
1213                        ((ip_block->version->major == major) &&
1214                        (ip_block->version->minor >= minor))))
1215                return 0;
1216
1217        return 1;
1218}
1219
1220/**
1221 * amdgpu_device_ip_block_add
1222 *
1223 * @adev: amdgpu_device pointer
1224 * @ip_block_version: pointer to the IP to add
1225 *
1226 * Adds the IP block driver information to the collection of IPs
1227 * on the asic.
1228 */
1229int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1230                               const struct amdgpu_ip_block_version *ip_block_version)
1231{
1232        if (!ip_block_version)
1233                return -EINVAL;
1234
1235        DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1236                  ip_block_version->funcs->name);
1237
1238        adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1239
1240        return 0;
1241}
1242
1243/**
1244 * amdgpu_device_enable_virtual_display - enable virtual display feature
1245 *
1246 * @adev: amdgpu_device pointer
1247 *
1248 * Enabled the virtual display feature if the user has enabled it via
1249 * the module parameter virtual_display.  This feature provides a virtual
1250 * display hardware on headless boards or in virtualized environments.
1251 * This function parses and validates the configuration string specified by
1252 * the user and configues the virtual display configuration (number of
1253 * virtual connectors, crtcs, etc.) specified.
1254 */
1255static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1256{
1257        adev->enable_virtual_display = false;
1258
1259        if (amdgpu_virtual_display) {
1260                struct drm_device *ddev = adev->ddev;
1261                const char *pci_address_name = pci_name(ddev->pdev);
1262                char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1263
1264                pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1265                pciaddstr_tmp = pciaddstr;
1266                while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1267                        pciaddname = strsep(&pciaddname_tmp, ",");
1268                        if (!strcmp("all", pciaddname)
1269                            || !strcmp(pci_address_name, pciaddname)) {
1270                                long num_crtc;
1271                                int res = -1;
1272
1273                                adev->enable_virtual_display = true;
1274
1275                                if (pciaddname_tmp)
1276                                        res = kstrtol(pciaddname_tmp, 10,
1277                                                      &num_crtc);
1278
1279                                if (!res) {
1280                                        if (num_crtc < 1)
1281                                                num_crtc = 1;
1282                                        if (num_crtc > 6)
1283                                                num_crtc = 6;
1284                                        adev->mode_info.num_crtc = num_crtc;
1285                                } else {
1286                                        adev->mode_info.num_crtc = 1;
1287                                }
1288                                break;
1289                        }
1290                }
1291
1292                DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1293                         amdgpu_virtual_display, pci_address_name,
1294                         adev->enable_virtual_display, adev->mode_info.num_crtc);
1295
1296                kfree(pciaddstr);
1297        }
1298}
1299
1300/**
1301 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1302 *
1303 * @adev: amdgpu_device pointer
1304 *
1305 * Parses the asic configuration parameters specified in the gpu info
1306 * firmware and makes them availale to the driver for use in configuring
1307 * the asic.
1308 * Returns 0 on success, -EINVAL on failure.
1309 */
1310static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1311{
1312        const char *chip_name;
1313        char fw_name[30];
1314        int err;
1315        const struct gpu_info_firmware_header_v1_0 *hdr;
1316
1317        adev->firmware.gpu_info_fw = NULL;
1318
1319        switch (adev->asic_type) {
1320        case CHIP_TOPAZ:
1321        case CHIP_TONGA:
1322        case CHIP_FIJI:
1323        case CHIP_POLARIS11:
1324        case CHIP_POLARIS10:
1325        case CHIP_POLARIS12:
1326        case CHIP_CARRIZO:
1327        case CHIP_STONEY:
1328#ifdef CONFIG_DRM_AMDGPU_SI
1329        case CHIP_VERDE:
1330        case CHIP_TAHITI:
1331        case CHIP_PITCAIRN:
1332        case CHIP_OLAND:
1333        case CHIP_HAINAN:
1334#endif
1335#ifdef CONFIG_DRM_AMDGPU_CIK
1336        case CHIP_BONAIRE:
1337        case CHIP_HAWAII:
1338        case CHIP_KAVERI:
1339        case CHIP_KABINI:
1340        case CHIP_MULLINS:
1341#endif
1342        default:
1343                return 0;
1344        case CHIP_VEGA10:
1345                chip_name = "vega10";
1346                break;
1347        case CHIP_VEGA12:
1348                chip_name = "vega12";
1349                break;
1350        case CHIP_RAVEN:
1351                chip_name = "raven";
1352                break;
1353        }
1354
1355        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1356        err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1357        if (err) {
1358                dev_err(adev->dev,
1359                        "Failed to load gpu_info firmware \"%s\"\n",
1360                        fw_name);
1361                goto out;
1362        }
1363        err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1364        if (err) {
1365                dev_err(adev->dev,
1366                        "Failed to validate gpu_info firmware \"%s\"\n",
1367                        fw_name);
1368                goto out;
1369        }
1370
1371        hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1372        amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1373
1374        switch (hdr->version_major) {
1375        case 1:
1376        {
1377                const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1378                        (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1379                                                                le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1380
1381                adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1382                adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1383                adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1384                adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1385                adev->gfx.config.max_texture_channel_caches =
1386                        le32_to_cpu(gpu_info_fw->gc_num_tccs);
1387                adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1388                adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1389                adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1390                adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1391                adev->gfx.config.double_offchip_lds_buf =
1392                        le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1393                adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1394                adev->gfx.cu_info.max_waves_per_simd =
1395                        le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1396                adev->gfx.cu_info.max_scratch_slots_per_cu =
1397                        le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1398                adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1399                break;
1400        }
1401        default:
1402                dev_err(adev->dev,
1403                        "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1404                err = -EINVAL;
1405                goto out;
1406        }
1407out:
1408        return err;
1409}
1410
1411/**
1412 * amdgpu_device_ip_early_init - run early init for hardware IPs
1413 *
1414 * @adev: amdgpu_device pointer
1415 *
1416 * Early initialization pass for hardware IPs.  The hardware IPs that make
1417 * up each asic are discovered each IP's early_init callback is run.  This
1418 * is the first stage in initializing the asic.
1419 * Returns 0 on success, negative error code on failure.
1420 */
1421static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1422{
1423        int i, r;
1424
1425        amdgpu_device_enable_virtual_display(adev);
1426
1427        switch (adev->asic_type) {
1428        case CHIP_TOPAZ:
1429        case CHIP_TONGA:
1430        case CHIP_FIJI:
1431        case CHIP_POLARIS11:
1432        case CHIP_POLARIS10:
1433        case CHIP_POLARIS12:
1434        case CHIP_CARRIZO:
1435        case CHIP_STONEY:
1436                if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1437                        adev->family = AMDGPU_FAMILY_CZ;
1438                else
1439                        adev->family = AMDGPU_FAMILY_VI;
1440
1441                r = vi_set_ip_blocks(adev);
1442                if (r)
1443                        return r;
1444                break;
1445#ifdef CONFIG_DRM_AMDGPU_SI
1446        case CHIP_VERDE:
1447        case CHIP_TAHITI:
1448        case CHIP_PITCAIRN:
1449        case CHIP_OLAND:
1450        case CHIP_HAINAN:
1451                adev->family = AMDGPU_FAMILY_SI;
1452                r = si_set_ip_blocks(adev);
1453                if (r)
1454                        return r;
1455                break;
1456#endif
1457#ifdef CONFIG_DRM_AMDGPU_CIK
1458        case CHIP_BONAIRE:
1459        case CHIP_HAWAII:
1460        case CHIP_KAVERI:
1461        case CHIP_KABINI:
1462        case CHIP_MULLINS:
1463                if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1464                        adev->family = AMDGPU_FAMILY_CI;
1465                else
1466                        adev->family = AMDGPU_FAMILY_KV;
1467
1468                r = cik_set_ip_blocks(adev);
1469                if (r)
1470                        return r;
1471                break;
1472#endif
1473        case CHIP_VEGA10:
1474        case CHIP_VEGA12:
1475        case CHIP_RAVEN:
1476                if (adev->asic_type == CHIP_RAVEN)
1477                        adev->family = AMDGPU_FAMILY_RV;
1478                else
1479                        adev->family = AMDGPU_FAMILY_AI;
1480
1481                r = soc15_set_ip_blocks(adev);
1482                if (r)
1483                        return r;
1484                break;
1485        default:
1486                /* FIXME: not supported yet */
1487                return -EINVAL;
1488        }
1489
1490        r = amdgpu_device_parse_gpu_info_fw(adev);
1491        if (r)
1492                return r;
1493
1494        amdgpu_amdkfd_device_probe(adev);
1495
1496        if (amdgpu_sriov_vf(adev)) {
1497                r = amdgpu_virt_request_full_gpu(adev, true);
1498                if (r)
1499                        return -EAGAIN;
1500        }
1501
1502        for (i = 0; i < adev->num_ip_blocks; i++) {
1503                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1504                        DRM_ERROR("disabled ip block: %d <%s>\n",
1505                                  i, adev->ip_blocks[i].version->funcs->name);
1506                        adev->ip_blocks[i].status.valid = false;
1507                } else {
1508                        if (adev->ip_blocks[i].version->funcs->early_init) {
1509                                r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1510                                if (r == -ENOENT) {
1511                                        adev->ip_blocks[i].status.valid = false;
1512                                } else if (r) {
1513                                        DRM_ERROR("early_init of IP block <%s> failed %d\n",
1514                                                  adev->ip_blocks[i].version->funcs->name, r);
1515                                        return r;
1516                                } else {
1517                                        adev->ip_blocks[i].status.valid = true;
1518                                }
1519                        } else {
1520                                adev->ip_blocks[i].status.valid = true;
1521                        }
1522                }
1523        }
1524
1525        adev->cg_flags &= amdgpu_cg_mask;
1526        adev->pg_flags &= amdgpu_pg_mask;
1527
1528        return 0;
1529}
1530
1531/**
1532 * amdgpu_device_ip_init - run init for hardware IPs
1533 *
1534 * @adev: amdgpu_device pointer
1535 *
1536 * Main initialization pass for hardware IPs.  The list of all the hardware
1537 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1538 * are run.  sw_init initializes the software state associated with each IP
1539 * and hw_init initializes the hardware associated with each IP.
1540 * Returns 0 on success, negative error code on failure.
1541 */
1542static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1543{
1544        int i, r;
1545
1546        for (i = 0; i < adev->num_ip_blocks; i++) {
1547                if (!adev->ip_blocks[i].status.valid)
1548                        continue;
1549                r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1550                if (r) {
1551                        DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1552                                  adev->ip_blocks[i].version->funcs->name, r);
1553                        return r;
1554                }
1555                adev->ip_blocks[i].status.sw = true;
1556
1557                /* need to do gmc hw init early so we can allocate gpu mem */
1558                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1559                        r = amdgpu_device_vram_scratch_init(adev);
1560                        if (r) {
1561                                DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1562                                return r;
1563                        }
1564                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1565                        if (r) {
1566                                DRM_ERROR("hw_init %d failed %d\n", i, r);
1567                                return r;
1568                        }
1569                        r = amdgpu_device_wb_init(adev);
1570                        if (r) {
1571                                DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1572                                return r;
1573                        }
1574                        adev->ip_blocks[i].status.hw = true;
1575
1576                        /* right after GMC hw init, we create CSA */
1577                        if (amdgpu_sriov_vf(adev)) {
1578                                r = amdgpu_allocate_static_csa(adev);
1579                                if (r) {
1580                                        DRM_ERROR("allocate CSA failed %d\n", r);
1581                                        return r;
1582                                }
1583                        }
1584                }
1585        }
1586
1587        for (i = 0; i < adev->num_ip_blocks; i++) {
1588                if (!adev->ip_blocks[i].status.sw)
1589                        continue;
1590                if (adev->ip_blocks[i].status.hw)
1591                        continue;
1592                r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1593                if (r) {
1594                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1595                                  adev->ip_blocks[i].version->funcs->name, r);
1596                        return r;
1597                }
1598                adev->ip_blocks[i].status.hw = true;
1599        }
1600
1601        amdgpu_amdkfd_device_init(adev);
1602
1603        if (amdgpu_sriov_vf(adev))
1604                amdgpu_virt_release_full_gpu(adev, true);
1605
1606        return 0;
1607}
1608
1609/**
1610 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1611 *
1612 * @adev: amdgpu_device pointer
1613 *
1614 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
1615 * this function before a GPU reset.  If the value is retained after a
1616 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
1617 */
1618static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1619{
1620        memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1621}
1622
1623/**
1624 * amdgpu_device_check_vram_lost - check if vram is valid
1625 *
1626 * @adev: amdgpu_device pointer
1627 *
1628 * Checks the reset magic value written to the gart pointer in VRAM.
1629 * The driver calls this after a GPU reset to see if the contents of
1630 * VRAM is lost or now.
1631 * returns true if vram is lost, false if not.
1632 */
1633static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1634{
1635        return !!memcmp(adev->gart.ptr, adev->reset_magic,
1636                        AMDGPU_RESET_MAGIC_NUM);
1637}
1638
1639/**
1640 * amdgpu_device_ip_late_set_cg_state - late init for clockgating
1641 *
1642 * @adev: amdgpu_device pointer
1643 *
1644 * Late initialization pass enabling clockgating for hardware IPs.
1645 * The list of all the hardware IPs that make up the asic is walked and the
1646 * set_clockgating_state callbacks are run.  This stage is run late
1647 * in the init process.
1648 * Returns 0 on success, negative error code on failure.
1649 */
1650static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
1651{
1652        int i = 0, r;
1653
1654        if (amdgpu_emu_mode == 1)
1655                return 0;
1656
1657        for (i = 0; i < adev->num_ip_blocks; i++) {
1658                if (!adev->ip_blocks[i].status.valid)
1659                        continue;
1660                /* skip CG for VCE/UVD, it's handled specially */
1661                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1662                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1663                    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1664                        /* enable clockgating to save power */
1665                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1666                                                                                     AMD_CG_STATE_GATE);
1667                        if (r) {
1668                                DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
1669                                          adev->ip_blocks[i].version->funcs->name, r);
1670                                return r;
1671                        }
1672                }
1673        }
1674        return 0;
1675}
1676
1677/**
1678 * amdgpu_device_ip_late_init - run late init for hardware IPs
1679 *
1680 * @adev: amdgpu_device pointer
1681 *
1682 * Late initialization pass for hardware IPs.  The list of all the hardware
1683 * IPs that make up the asic is walked and the late_init callbacks are run.
1684 * late_init covers any special initialization that an IP requires
1685 * after all of the have been initialized or something that needs to happen
1686 * late in the init process.
1687 * Returns 0 on success, negative error code on failure.
1688 */
1689static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
1690{
1691        int i = 0, r;
1692
1693        for (i = 0; i < adev->num_ip_blocks; i++) {
1694                if (!adev->ip_blocks[i].status.valid)
1695                        continue;
1696                if (adev->ip_blocks[i].version->funcs->late_init) {
1697                        r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1698                        if (r) {
1699                                DRM_ERROR("late_init of IP block <%s> failed %d\n",
1700                                          adev->ip_blocks[i].version->funcs->name, r);
1701                                return r;
1702                        }
1703                        adev->ip_blocks[i].status.late_initialized = true;
1704                }
1705        }
1706
1707        mod_delayed_work(system_wq, &adev->late_init_work,
1708                        msecs_to_jiffies(AMDGPU_RESUME_MS));
1709
1710        amdgpu_device_fill_reset_magic(adev);
1711
1712        return 0;
1713}
1714
1715/**
1716 * amdgpu_device_ip_fini - run fini for hardware IPs
1717 *
1718 * @adev: amdgpu_device pointer
1719 *
1720 * Main teardown pass for hardware IPs.  The list of all the hardware
1721 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1722 * are run.  hw_fini tears down the hardware associated with each IP
1723 * and sw_fini tears down any software state associated with each IP.
1724 * Returns 0 on success, negative error code on failure.
1725 */
1726static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1727{
1728        int i, r;
1729
1730        amdgpu_amdkfd_device_fini(adev);
1731        /* need to disable SMC first */
1732        for (i = 0; i < adev->num_ip_blocks; i++) {
1733                if (!adev->ip_blocks[i].status.hw)
1734                        continue;
1735                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
1736                        adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1737                        /* ungate blocks before hw fini so that we can shutdown the blocks safely */
1738                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1739                                                                                     AMD_CG_STATE_UNGATE);
1740                        if (r) {
1741                                DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
1742                                          adev->ip_blocks[i].version->funcs->name, r);
1743                                return r;
1744                        }
1745                        r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
1746                        /* XXX handle errors */
1747                        if (r) {
1748                                DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1749                                          adev->ip_blocks[i].version->funcs->name, r);
1750                        }
1751                        adev->ip_blocks[i].status.hw = false;
1752                        break;
1753                }
1754        }
1755
1756        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1757                if (!adev->ip_blocks[i].status.hw)
1758                        continue;
1759
1760                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1761                        adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1762                        adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1763                        /* ungate blocks before hw fini so that we can shutdown the blocks safely */
1764                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1765                                                                                     AMD_CG_STATE_UNGATE);
1766                        if (r) {
1767                                DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
1768                                          adev->ip_blocks[i].version->funcs->name, r);
1769                                return r;
1770                        }
1771                }
1772
1773                r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
1774                /* XXX handle errors */
1775                if (r) {
1776                        DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1777                                  adev->ip_blocks[i].version->funcs->name, r);
1778                }
1779
1780                adev->ip_blocks[i].status.hw = false;
1781        }
1782
1783
1784        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1785                if (!adev->ip_blocks[i].status.sw)
1786                        continue;
1787
1788                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1789                        amdgpu_free_static_csa(adev);
1790                        amdgpu_device_wb_fini(adev);
1791                        amdgpu_device_vram_scratch_fini(adev);
1792                }
1793
1794                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
1795                /* XXX handle errors */
1796                if (r) {
1797                        DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1798                                  adev->ip_blocks[i].version->funcs->name, r);
1799                }
1800                adev->ip_blocks[i].status.sw = false;
1801                adev->ip_blocks[i].status.valid = false;
1802        }
1803
1804        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1805                if (!adev->ip_blocks[i].status.late_initialized)
1806                        continue;
1807                if (adev->ip_blocks[i].version->funcs->late_fini)
1808                        adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1809                adev->ip_blocks[i].status.late_initialized = false;
1810        }
1811
1812        if (amdgpu_sriov_vf(adev))
1813                if (amdgpu_virt_release_full_gpu(adev, false))
1814                        DRM_ERROR("failed to release exclusive mode on fini\n");
1815
1816        return 0;
1817}
1818
1819/**
1820 * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
1821 *
1822 * @work: work_struct
1823 *
1824 * Work handler for amdgpu_device_ip_late_set_cg_state.  We put the
1825 * clockgating setup into a worker thread to speed up driver init and
1826 * resume from suspend.
1827 */
1828static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
1829{
1830        struct amdgpu_device *adev =
1831                container_of(work, struct amdgpu_device, late_init_work.work);
1832        amdgpu_device_ip_late_set_cg_state(adev);
1833}
1834
1835/**
1836 * amdgpu_device_ip_suspend - run suspend for hardware IPs
1837 *
1838 * @adev: amdgpu_device pointer
1839 *
1840 * Main suspend function for hardware IPs.  The list of all the hardware
1841 * IPs that make up the asic is walked, clockgating is disabled and the
1842 * suspend callbacks are run.  suspend puts the hardware and software state
1843 * in each IP into a state suitable for suspend.
1844 * Returns 0 on success, negative error code on failure.
1845 */
1846int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
1847{
1848        int i, r;
1849
1850        if (amdgpu_sriov_vf(adev))
1851                amdgpu_virt_request_full_gpu(adev, false);
1852
1853        /* ungate SMC block first */
1854        r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
1855                                                   AMD_CG_STATE_UNGATE);
1856        if (r) {
1857                DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
1858        }
1859
1860        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1861                if (!adev->ip_blocks[i].status.valid)
1862                        continue;
1863                /* ungate blocks so that suspend can properly shut them down */
1864                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
1865                        adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1866                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1867                                                                                     AMD_CG_STATE_UNGATE);
1868                        if (r) {
1869                                DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
1870                                          adev->ip_blocks[i].version->funcs->name, r);
1871                        }
1872                }
1873                /* XXX handle errors */
1874                r = adev->ip_blocks[i].version->funcs->suspend(adev);
1875                /* XXX handle errors */
1876                if (r) {
1877                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
1878                                  adev->ip_blocks[i].version->funcs->name, r);
1879                }
1880        }
1881
1882        if (amdgpu_sriov_vf(adev))
1883                amdgpu_virt_release_full_gpu(adev, false);
1884
1885        return 0;
1886}
1887
1888static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
1889{
1890        int i, r;
1891
1892        static enum amd_ip_block_type ip_order[] = {
1893                AMD_IP_BLOCK_TYPE_GMC,
1894                AMD_IP_BLOCK_TYPE_COMMON,
1895                AMD_IP_BLOCK_TYPE_IH,
1896        };
1897
1898        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
1899                int j;
1900                struct amdgpu_ip_block *block;
1901
1902                for (j = 0; j < adev->num_ip_blocks; j++) {
1903                        block = &adev->ip_blocks[j];
1904
1905                        if (block->version->type != ip_order[i] ||
1906                                !block->status.valid)
1907                                continue;
1908
1909                        r = block->version->funcs->hw_init(adev);
1910                        DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1911                        if (r)
1912                                return r;
1913                }
1914        }
1915
1916        return 0;
1917}
1918
1919static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
1920{
1921        int i, r;
1922
1923        static enum amd_ip_block_type ip_order[] = {
1924                AMD_IP_BLOCK_TYPE_SMC,
1925                AMD_IP_BLOCK_TYPE_PSP,
1926                AMD_IP_BLOCK_TYPE_DCE,
1927                AMD_IP_BLOCK_TYPE_GFX,
1928                AMD_IP_BLOCK_TYPE_SDMA,
1929                AMD_IP_BLOCK_TYPE_UVD,
1930                AMD_IP_BLOCK_TYPE_VCE
1931        };
1932
1933        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
1934                int j;
1935                struct amdgpu_ip_block *block;
1936
1937                for (j = 0; j < adev->num_ip_blocks; j++) {
1938                        block = &adev->ip_blocks[j];
1939
1940                        if (block->version->type != ip_order[i] ||
1941                                !block->status.valid)
1942                                continue;
1943
1944                        r = block->version->funcs->hw_init(adev);
1945                        DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1946                        if (r)
1947                                return r;
1948                }
1949        }
1950
1951        return 0;
1952}
1953
1954/**
1955 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
1956 *
1957 * @adev: amdgpu_device pointer
1958 *
1959 * First resume function for hardware IPs.  The list of all the hardware
1960 * IPs that make up the asic is walked and the resume callbacks are run for
1961 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
1962 * after a suspend and updates the software state as necessary.  This
1963 * function is also used for restoring the GPU after a GPU reset.
1964 * Returns 0 on success, negative error code on failure.
1965 */
1966static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
1967{
1968        int i, r;
1969
1970        for (i = 0; i < adev->num_ip_blocks; i++) {
1971                if (!adev->ip_blocks[i].status.valid)
1972                        continue;
1973                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1974                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
1975                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1976                        r = adev->ip_blocks[i].version->funcs->resume(adev);
1977                        if (r) {
1978                                DRM_ERROR("resume of IP block <%s> failed %d\n",
1979                                          adev->ip_blocks[i].version->funcs->name, r);
1980                                return r;
1981                        }
1982                }
1983        }
1984
1985        return 0;
1986}
1987
1988/**
1989 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
1990 *
1991 * @adev: amdgpu_device pointer
1992 *
1993 * First resume function for hardware IPs.  The list of all the hardware
1994 * IPs that make up the asic is walked and the resume callbacks are run for
1995 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
1996 * functional state after a suspend and updates the software state as
1997 * necessary.  This function is also used for restoring the GPU after a GPU
1998 * reset.
1999 * Returns 0 on success, negative error code on failure.
2000 */
2001static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2002{
2003        int i, r;
2004
2005        for (i = 0; i < adev->num_ip_blocks; i++) {
2006                if (!adev->ip_blocks[i].status.valid)
2007                        continue;
2008                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2009                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2010                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
2011                        continue;
2012                r = adev->ip_blocks[i].version->funcs->resume(adev);
2013                if (r) {
2014                        DRM_ERROR("resume of IP block <%s> failed %d\n",
2015                                  adev->ip_blocks[i].version->funcs->name, r);
2016                        return r;
2017                }
2018        }
2019
2020        return 0;
2021}
2022
2023/**
2024 * amdgpu_device_ip_resume - run resume for hardware IPs
2025 *
2026 * @adev: amdgpu_device pointer
2027 *
2028 * Main resume function for hardware IPs.  The hardware IPs
2029 * are split into two resume functions because they are
2030 * are also used in in recovering from a GPU reset and some additional
2031 * steps need to be take between them.  In this case (S3/S4) they are
2032 * run sequentially.
2033 * Returns 0 on success, negative error code on failure.
2034 */
2035static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2036{
2037        int r;
2038
2039        r = amdgpu_device_ip_resume_phase1(adev);
2040        if (r)
2041                return r;
2042        r = amdgpu_device_ip_resume_phase2(adev);
2043
2044        return r;
2045}
2046
2047/**
2048 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2049 *
2050 * @adev: amdgpu_device pointer
2051 *
2052 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2053 */
2054static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2055{
2056        if (amdgpu_sriov_vf(adev)) {
2057                if (adev->is_atom_fw) {
2058                        if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2059                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2060                } else {
2061                        if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2062                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2063                }
2064
2065                if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2066                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2067        }
2068}
2069
2070/**
2071 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2072 *
2073 * @asic_type: AMD asic type
2074 *
2075 * Check if there is DC (new modesetting infrastructre) support for an asic.
2076 * returns true if DC has support, false if not.
2077 */
2078bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2079{
2080        switch (asic_type) {
2081#if defined(CONFIG_DRM_AMD_DC)
2082        case CHIP_BONAIRE:
2083        case CHIP_HAWAII:
2084        case CHIP_KAVERI:
2085        case CHIP_KABINI:
2086        case CHIP_MULLINS:
2087        case CHIP_CARRIZO:
2088        case CHIP_STONEY:
2089        case CHIP_POLARIS11:
2090        case CHIP_POLARIS10:
2091        case CHIP_POLARIS12:
2092        case CHIP_TONGA:
2093        case CHIP_FIJI:
2094#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
2095                return amdgpu_dc != 0;
2096#endif
2097        case CHIP_VEGA10:
2098        case CHIP_VEGA12:
2099#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2100        case CHIP_RAVEN:
2101#endif
2102                return amdgpu_dc != 0;
2103#endif
2104        default:
2105                return false;
2106        }
2107}
2108
2109/**
2110 * amdgpu_device_has_dc_support - check if dc is supported
2111 *
2112 * @adev: amdgpu_device_pointer
2113 *
2114 * Returns true for supported, false for not supported
2115 */
2116bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2117{
2118        if (amdgpu_sriov_vf(adev))
2119                return false;
2120
2121        return amdgpu_device_asic_has_dc_support(adev->asic_type);
2122}
2123
2124/**
2125 * amdgpu_device_init - initialize the driver
2126 *
2127 * @adev: amdgpu_device pointer
2128 * @pdev: drm dev pointer
2129 * @pdev: pci dev pointer
2130 * @flags: driver flags
2131 *
2132 * Initializes the driver info and hw (all asics).
2133 * Returns 0 for success or an error on failure.
2134 * Called at driver startup.
2135 */
2136int amdgpu_device_init(struct amdgpu_device *adev,
2137                       struct drm_device *ddev,
2138                       struct pci_dev *pdev,
2139                       uint32_t flags)
2140{
2141        int r, i;
2142        bool runtime = false;
2143        u32 max_MBps;
2144
2145        adev->shutdown = false;
2146        adev->dev = &pdev->dev;
2147        adev->ddev = ddev;
2148        adev->pdev = pdev;
2149        adev->flags = flags;
2150        adev->asic_type = flags & AMD_ASIC_MASK;
2151        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2152        if (amdgpu_emu_mode == 1)
2153                adev->usec_timeout *= 2;
2154        adev->gmc.gart_size = 512 * 1024 * 1024;
2155        adev->accel_working = false;
2156        adev->num_rings = 0;
2157        adev->mman.buffer_funcs = NULL;
2158        adev->mman.buffer_funcs_ring = NULL;
2159        adev->vm_manager.vm_pte_funcs = NULL;
2160        adev->vm_manager.vm_pte_num_rings = 0;
2161        adev->gmc.gmc_funcs = NULL;
2162        adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2163        bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2164
2165        adev->smc_rreg = &amdgpu_invalid_rreg;
2166        adev->smc_wreg = &amdgpu_invalid_wreg;
2167        adev->pcie_rreg = &amdgpu_invalid_rreg;
2168        adev->pcie_wreg = &amdgpu_invalid_wreg;
2169        adev->pciep_rreg = &amdgpu_invalid_rreg;
2170        adev->pciep_wreg = &amdgpu_invalid_wreg;
2171        adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2172        adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2173        adev->didt_rreg = &amdgpu_invalid_rreg;
2174        adev->didt_wreg = &amdgpu_invalid_wreg;
2175        adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2176        adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2177        adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2178        adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2179
2180        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2181                 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2182                 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2183
2184        /* mutex initialization are all done here so we
2185         * can recall function without having locking issues */
2186        atomic_set(&adev->irq.ih.lock, 0);
2187        mutex_init(&adev->firmware.mutex);
2188        mutex_init(&adev->pm.mutex);
2189        mutex_init(&adev->gfx.gpu_clock_mutex);
2190        mutex_init(&adev->srbm_mutex);
2191        mutex_init(&adev->gfx.pipe_reserve_mutex);
2192        mutex_init(&adev->grbm_idx_mutex);
2193        mutex_init(&adev->mn_lock);
2194        mutex_init(&adev->virt.vf_errors.lock);
2195        hash_init(adev->mn_hash);
2196        mutex_init(&adev->lock_reset);
2197
2198        amdgpu_device_check_arguments(adev);
2199
2200        spin_lock_init(&adev->mmio_idx_lock);
2201        spin_lock_init(&adev->smc_idx_lock);
2202        spin_lock_init(&adev->pcie_idx_lock);
2203        spin_lock_init(&adev->uvd_ctx_idx_lock);
2204        spin_lock_init(&adev->didt_idx_lock);
2205        spin_lock_init(&adev->gc_cac_idx_lock);
2206        spin_lock_init(&adev->se_cac_idx_lock);
2207        spin_lock_init(&adev->audio_endpt_idx_lock);
2208        spin_lock_init(&adev->mm_stats.lock);
2209
2210        INIT_LIST_HEAD(&adev->shadow_list);
2211        mutex_init(&adev->shadow_list_lock);
2212
2213        INIT_LIST_HEAD(&adev->ring_lru_list);
2214        spin_lock_init(&adev->ring_lru_list_lock);
2215
2216        INIT_DELAYED_WORK(&adev->late_init_work,
2217                          amdgpu_device_ip_late_init_func_handler);
2218
2219        /* Registers mapping */
2220        /* TODO: block userspace mapping of io register */
2221        if (adev->asic_type >= CHIP_BONAIRE) {
2222                adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2223                adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2224        } else {
2225                adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2226                adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2227        }
2228
2229        adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2230        if (adev->rmmio == NULL) {
2231                return -ENOMEM;
2232        }
2233        DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2234        DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2235
2236        /* doorbell bar mapping */
2237        amdgpu_device_doorbell_init(adev);
2238
2239        /* io port mapping */
2240        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2241                if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2242                        adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2243                        adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2244                        break;
2245                }
2246        }
2247        if (adev->rio_mem == NULL)
2248                DRM_INFO("PCI I/O BAR is not found.\n");
2249
2250        amdgpu_device_get_pcie_info(adev);
2251
2252        /* early init functions */
2253        r = amdgpu_device_ip_early_init(adev);
2254        if (r)
2255                return r;
2256
2257        /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2258        /* this will fail for cards that aren't VGA class devices, just
2259         * ignore it */
2260        vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
2261
2262        if (amdgpu_device_is_px(ddev))
2263                runtime = true;
2264        if (!pci_is_thunderbolt_attached(adev->pdev))
2265                vga_switcheroo_register_client(adev->pdev,
2266                                               &amdgpu_switcheroo_ops, runtime);
2267        if (runtime)
2268                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2269
2270        if (amdgpu_emu_mode == 1) {
2271                /* post the asic on emulation mode */
2272                emu_soc_asic_init(adev);
2273                goto fence_driver_init;
2274        }
2275
2276        /* Read BIOS */
2277        if (!amdgpu_get_bios(adev)) {
2278                r = -EINVAL;
2279                goto failed;
2280        }
2281
2282        r = amdgpu_atombios_init(adev);
2283        if (r) {
2284                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2285                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2286                goto failed;
2287        }
2288
2289        /* detect if we are with an SRIOV vbios */
2290        amdgpu_device_detect_sriov_bios(adev);
2291
2292        /* Post card if necessary */
2293        if (amdgpu_device_need_post(adev)) {
2294                if (!adev->bios) {
2295                        dev_err(adev->dev, "no vBIOS found\n");
2296                        r = -EINVAL;
2297                        goto failed;
2298                }
2299                DRM_INFO("GPU posting now...\n");
2300                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2301                if (r) {
2302                        dev_err(adev->dev, "gpu post error!\n");
2303                        goto failed;
2304                }
2305        }
2306
2307        if (adev->is_atom_fw) {
2308                /* Initialize clocks */
2309                r = amdgpu_atomfirmware_get_clock_info(adev);
2310                if (r) {
2311                        dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2312                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2313                        goto failed;
2314                }
2315        } else {
2316                /* Initialize clocks */
2317                r = amdgpu_atombios_get_clock_info(adev);
2318                if (r) {
2319                        dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2320                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2321                        goto failed;
2322                }
2323                /* init i2c buses */
2324                if (!amdgpu_device_has_dc_support(adev))
2325                        amdgpu_atombios_i2c_init(adev);
2326        }
2327
2328fence_driver_init:
2329        /* Fence driver */
2330        r = amdgpu_fence_driver_init(adev);
2331        if (r) {
2332                dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2333                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2334                goto failed;
2335        }
2336
2337        /* init the mode config */
2338        drm_mode_config_init(adev->ddev);
2339
2340        r = amdgpu_device_ip_init(adev);
2341        if (r) {
2342                /* failed in exclusive mode due to timeout */
2343                if (amdgpu_sriov_vf(adev) &&
2344                    !amdgpu_sriov_runtime(adev) &&
2345                    amdgpu_virt_mmio_blocked(adev) &&
2346                    !amdgpu_virt_wait_reset(adev)) {
2347                        dev_err(adev->dev, "VF exclusive mode timeout\n");
2348                        /* Don't send request since VF is inactive. */
2349                        adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2350                        adev->virt.ops = NULL;
2351                        r = -EAGAIN;
2352                        goto failed;
2353                }
2354                dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2355                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2356                goto failed;
2357        }
2358
2359        adev->accel_working = true;
2360
2361        amdgpu_vm_check_compute_bug(adev);
2362
2363        /* Initialize the buffer migration limit. */
2364        if (amdgpu_moverate >= 0)
2365                max_MBps = amdgpu_moverate;
2366        else
2367                max_MBps = 8; /* Allow 8 MB/s. */
2368        /* Get a log2 for easy divisions. */
2369        adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2370
2371        r = amdgpu_ib_pool_init(adev);
2372        if (r) {
2373                dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2374                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2375                goto failed;
2376        }
2377
2378        r = amdgpu_ib_ring_tests(adev);
2379        if (r)
2380                DRM_ERROR("ib ring test failed (%d).\n", r);
2381
2382        if (amdgpu_sriov_vf(adev))
2383                amdgpu_virt_init_data_exchange(adev);
2384
2385        amdgpu_fbdev_init(adev);
2386
2387        r = amdgpu_pm_sysfs_init(adev);
2388        if (r)
2389                DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2390
2391        r = amdgpu_debugfs_gem_init(adev);
2392        if (r)
2393                DRM_ERROR("registering gem debugfs failed (%d).\n", r);
2394
2395        r = amdgpu_debugfs_regs_init(adev);
2396        if (r)
2397                DRM_ERROR("registering register debugfs failed (%d).\n", r);
2398
2399        r = amdgpu_debugfs_firmware_init(adev);
2400        if (r)
2401                DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2402
2403        r = amdgpu_debugfs_init(adev);
2404        if (r)
2405                DRM_ERROR("Creating debugfs files failed (%d).\n", r);
2406
2407        if ((amdgpu_testing & 1)) {
2408                if (adev->accel_working)
2409                        amdgpu_test_moves(adev);
2410                else
2411                        DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2412        }
2413        if (amdgpu_benchmarking) {
2414                if (adev->accel_working)
2415                        amdgpu_benchmark(adev, amdgpu_benchmarking);
2416                else
2417                        DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2418        }
2419
2420        /* enable clockgating, etc. after ib tests, etc. since some blocks require
2421         * explicit gating rather than handling it automatically.
2422         */
2423        r = amdgpu_device_ip_late_init(adev);
2424        if (r) {
2425                dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
2426                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2427                goto failed;
2428        }
2429
2430        return 0;
2431
2432failed:
2433        amdgpu_vf_error_trans_all(adev);
2434        if (runtime)
2435                vga_switcheroo_fini_domain_pm_ops(adev->dev);
2436
2437        return r;
2438}
2439
2440/**
2441 * amdgpu_device_fini - tear down the driver
2442 *
2443 * @adev: amdgpu_device pointer
2444 *
2445 * Tear down the driver info (all asics).
2446 * Called at driver shutdown.
2447 */
2448void amdgpu_device_fini(struct amdgpu_device *adev)
2449{
2450        int r;
2451
2452        DRM_INFO("amdgpu: finishing device.\n");
2453        adev->shutdown = true;
2454        /* disable all interrupts */
2455        amdgpu_irq_disable_all(adev);
2456        if (adev->mode_info.mode_config_initialized){
2457                if (!amdgpu_device_has_dc_support(adev))
2458                        drm_crtc_force_disable_all(adev->ddev);
2459                else
2460                        drm_atomic_helper_shutdown(adev->ddev);
2461        }
2462        amdgpu_ib_pool_fini(adev);
2463        amdgpu_fence_driver_fini(adev);
2464        amdgpu_pm_sysfs_fini(adev);
2465        amdgpu_fbdev_fini(adev);
2466        r = amdgpu_device_ip_fini(adev);
2467        if (adev->firmware.gpu_info_fw) {
2468                release_firmware(adev->firmware.gpu_info_fw);
2469                adev->firmware.gpu_info_fw = NULL;
2470        }
2471        adev->accel_working = false;
2472        cancel_delayed_work_sync(&adev->late_init_work);
2473        /* free i2c buses */
2474        if (!amdgpu_device_has_dc_support(adev))
2475                amdgpu_i2c_fini(adev);
2476
2477        if (amdgpu_emu_mode != 1)
2478                amdgpu_atombios_fini(adev);
2479
2480        kfree(adev->bios);
2481        adev->bios = NULL;
2482        if (!pci_is_thunderbolt_attached(adev->pdev))
2483                vga_switcheroo_unregister_client(adev->pdev);
2484        if (adev->flags & AMD_IS_PX)
2485                vga_switcheroo_fini_domain_pm_ops(adev->dev);
2486        vga_client_register(adev->pdev, NULL, NULL, NULL);
2487        if (adev->rio_mem)
2488                pci_iounmap(adev->pdev, adev->rio_mem);
2489        adev->rio_mem = NULL;
2490        iounmap(adev->rmmio);
2491        adev->rmmio = NULL;
2492        amdgpu_device_doorbell_fini(adev);
2493        amdgpu_debugfs_regs_cleanup(adev);
2494}
2495
2496
2497/*
2498 * Suspend & resume.
2499 */
2500/**
2501 * amdgpu_device_suspend - initiate device suspend
2502 *
2503 * @pdev: drm dev pointer
2504 * @state: suspend state
2505 *
2506 * Puts the hw in the suspend state (all asics).
2507 * Returns 0 for success or an error on failure.
2508 * Called at driver suspend.
2509 */
2510int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2511{
2512        struct amdgpu_device *adev;
2513        struct drm_crtc *crtc;
2514        struct drm_connector *connector;
2515        int r;
2516
2517        if (dev == NULL || dev->dev_private == NULL) {
2518                return -ENODEV;
2519        }
2520
2521        adev = dev->dev_private;
2522
2523        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2524                return 0;
2525
2526        drm_kms_helper_poll_disable(dev);
2527
2528        if (!amdgpu_device_has_dc_support(adev)) {
2529                /* turn off display hw */
2530                drm_modeset_lock_all(dev);
2531                list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2532                        drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2533                }
2534                drm_modeset_unlock_all(dev);
2535        }
2536
2537        amdgpu_amdkfd_suspend(adev);
2538
2539        /* unpin the front buffers and cursors */
2540        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2541                struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2542                struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
2543                struct amdgpu_bo *robj;
2544
2545                if (amdgpu_crtc->cursor_bo) {
2546                        struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2547                        r = amdgpu_bo_reserve(aobj, true);
2548                        if (r == 0) {
2549                                amdgpu_bo_unpin(aobj);
2550                                amdgpu_bo_unreserve(aobj);
2551                        }
2552                }
2553
2554                if (rfb == NULL || rfb->obj == NULL) {
2555                        continue;
2556                }
2557                robj = gem_to_amdgpu_bo(rfb->obj);
2558                /* don't unpin kernel fb objects */
2559                if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2560                        r = amdgpu_bo_reserve(robj, true);
2561                        if (r == 0) {
2562                                amdgpu_bo_unpin(robj);
2563                                amdgpu_bo_unreserve(robj);
2564                        }
2565                }
2566        }
2567        /* evict vram memory */
2568        amdgpu_bo_evict_vram(adev);
2569
2570        amdgpu_fence_driver_suspend(adev);
2571
2572        r = amdgpu_device_ip_suspend(adev);
2573
2574        /* evict remaining vram memory
2575         * This second call to evict vram is to evict the gart page table
2576         * using the CPU.
2577         */
2578        amdgpu_bo_evict_vram(adev);
2579
2580        pci_save_state(dev->pdev);
2581        if (suspend) {
2582                /* Shut down the device */
2583                pci_disable_device(dev->pdev);
2584                pci_set_power_state(dev->pdev, PCI_D3hot);
2585        } else {
2586                r = amdgpu_asic_reset(adev);
2587                if (r)
2588                        DRM_ERROR("amdgpu asic reset failed\n");
2589        }
2590
2591        if (fbcon) {
2592                console_lock();
2593                amdgpu_fbdev_set_suspend(adev, 1);
2594                console_unlock();
2595        }
2596        return 0;
2597}
2598
2599/**
2600 * amdgpu_device_resume - initiate device resume
2601 *
2602 * @pdev: drm dev pointer
2603 *
2604 * Bring the hw back to operating state (all asics).
2605 * Returns 0 for success or an error on failure.
2606 * Called at driver resume.
2607 */
2608int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
2609{
2610        struct drm_connector *connector;
2611        struct amdgpu_device *adev = dev->dev_private;
2612        struct drm_crtc *crtc;
2613        int r = 0;
2614
2615        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2616                return 0;
2617
2618        if (fbcon)
2619                console_lock();
2620
2621        if (resume) {
2622                pci_set_power_state(dev->pdev, PCI_D0);
2623                pci_restore_state(dev->pdev);
2624                r = pci_enable_device(dev->pdev);
2625                if (r)
2626                        goto unlock;
2627        }
2628
2629        /* post card */
2630        if (amdgpu_device_need_post(adev)) {
2631                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2632                if (r)
2633                        DRM_ERROR("amdgpu asic init failed\n");
2634        }
2635
2636        r = amdgpu_device_ip_resume(adev);
2637        if (r) {
2638                DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
2639                goto unlock;
2640        }
2641        amdgpu_fence_driver_resume(adev);
2642
2643        if (resume) {
2644                r = amdgpu_ib_ring_tests(adev);
2645                if (r)
2646                        DRM_ERROR("ib ring test failed (%d).\n", r);
2647        }
2648
2649        r = amdgpu_device_ip_late_init(adev);
2650        if (r)
2651                goto unlock;
2652
2653        /* pin cursors */
2654        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2655                struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2656
2657                if (amdgpu_crtc->cursor_bo) {
2658                        struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2659                        r = amdgpu_bo_reserve(aobj, true);
2660                        if (r == 0) {
2661                                r = amdgpu_bo_pin(aobj,
2662                                                  AMDGPU_GEM_DOMAIN_VRAM,
2663                                                  &amdgpu_crtc->cursor_addr);
2664                                if (r != 0)
2665                                        DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2666                                amdgpu_bo_unreserve(aobj);
2667                        }
2668                }
2669        }
2670        r = amdgpu_amdkfd_resume(adev);
2671        if (r)
2672                return r;
2673
2674        /* blat the mode back in */
2675        if (fbcon) {
2676                if (!amdgpu_device_has_dc_support(adev)) {
2677                        /* pre DCE11 */
2678                        drm_helper_resume_force_mode(dev);
2679
2680                        /* turn on display hw */
2681                        drm_modeset_lock_all(dev);
2682                        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2683                                drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2684                        }
2685                        drm_modeset_unlock_all(dev);
2686                }
2687        }
2688
2689        drm_kms_helper_poll_enable(dev);
2690
2691        /*
2692         * Most of the connector probing functions try to acquire runtime pm
2693         * refs to ensure that the GPU is powered on when connector polling is
2694         * performed. Since we're calling this from a runtime PM callback,
2695         * trying to acquire rpm refs will cause us to deadlock.
2696         *
2697         * Since we're guaranteed to be holding the rpm lock, it's safe to
2698         * temporarily disable the rpm helpers so this doesn't deadlock us.
2699         */
2700#ifdef CONFIG_PM
2701        dev->dev->power.disable_depth++;
2702#endif
2703        if (!amdgpu_device_has_dc_support(adev))
2704                drm_helper_hpd_irq_event(dev);
2705        else
2706                drm_kms_helper_hotplug_event(dev);
2707#ifdef CONFIG_PM
2708        dev->dev->power.disable_depth--;
2709#endif
2710
2711        if (fbcon)
2712                amdgpu_fbdev_set_suspend(adev, 0);
2713
2714unlock:
2715        if (fbcon)
2716                console_unlock();
2717
2718        return r;
2719}
2720
2721/**
2722 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2723 *
2724 * @adev: amdgpu_device pointer
2725 *
2726 * The list of all the hardware IPs that make up the asic is walked and
2727 * the check_soft_reset callbacks are run.  check_soft_reset determines
2728 * if the asic is still hung or not.
2729 * Returns true if any of the IPs are still in a hung state, false if not.
2730 */
2731static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
2732{
2733        int i;
2734        bool asic_hang = false;
2735
2736        if (amdgpu_sriov_vf(adev))
2737                return true;
2738
2739        for (i = 0; i < adev->num_ip_blocks; i++) {
2740                if (!adev->ip_blocks[i].status.valid)
2741                        continue;
2742                if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2743                        adev->ip_blocks[i].status.hang =
2744                                adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2745                if (adev->ip_blocks[i].status.hang) {
2746                        DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
2747                        asic_hang = true;
2748                }
2749        }
2750        return asic_hang;
2751}
2752
2753/**
2754 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2755 *
2756 * @adev: amdgpu_device pointer
2757 *
2758 * The list of all the hardware IPs that make up the asic is walked and the
2759 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
2760 * handles any IP specific hardware or software state changes that are
2761 * necessary for a soft reset to succeed.
2762 * Returns 0 on success, negative error code on failure.
2763 */
2764static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
2765{
2766        int i, r = 0;
2767
2768        for (i = 0; i < adev->num_ip_blocks; i++) {
2769                if (!adev->ip_blocks[i].status.valid)
2770                        continue;
2771                if (adev->ip_blocks[i].status.hang &&
2772                    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
2773                        r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
2774                        if (r)
2775                                return r;
2776                }
2777        }
2778
2779        return 0;
2780}
2781
2782/**
2783 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
2784 *
2785 * @adev: amdgpu_device pointer
2786 *
2787 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
2788 * reset is necessary to recover.
2789 * Returns true if a full asic reset is required, false if not.
2790 */
2791static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
2792{
2793        int i;
2794
2795        for (i = 0; i < adev->num_ip_blocks; i++) {
2796                if (!adev->ip_blocks[i].status.valid)
2797                        continue;
2798                if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
2799                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
2800                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
2801                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
2802                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2803                        if (adev->ip_blocks[i].status.hang) {
2804                                DRM_INFO("Some block need full reset!\n");
2805                                return true;
2806                        }
2807                }
2808        }
2809        return false;
2810}
2811
2812/**
2813 * amdgpu_device_ip_soft_reset - do a soft reset
2814 *
2815 * @adev: amdgpu_device pointer
2816 *
2817 * The list of all the hardware IPs that make up the asic is walked and the
2818 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
2819 * IP specific hardware or software state changes that are necessary to soft
2820 * reset the IP.
2821 * Returns 0 on success, negative error code on failure.
2822 */
2823static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
2824{
2825        int i, r = 0;
2826
2827        for (i = 0; i < adev->num_ip_blocks; i++) {
2828                if (!adev->ip_blocks[i].status.valid)
2829                        continue;
2830                if (adev->ip_blocks[i].status.hang &&
2831                    adev->ip_blocks[i].version->funcs->soft_reset) {
2832                        r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
2833                        if (r)
2834                                return r;
2835                }
2836        }
2837
2838        return 0;
2839}
2840
2841/**
2842 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
2843 *
2844 * @adev: amdgpu_device pointer
2845 *
2846 * The list of all the hardware IPs that make up the asic is walked and the
2847 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
2848 * handles any IP specific hardware or software state changes that are
2849 * necessary after the IP has been soft reset.
2850 * Returns 0 on success, negative error code on failure.
2851 */
2852static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
2853{
2854        int i, r = 0;
2855
2856        for (i = 0; i < adev->num_ip_blocks; i++) {
2857                if (!adev->ip_blocks[i].status.valid)
2858                        continue;
2859                if (adev->ip_blocks[i].status.hang &&
2860                    adev->ip_blocks[i].version->funcs->post_soft_reset)
2861                        r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
2862                if (r)
2863                        return r;
2864        }
2865
2866        return 0;
2867}
2868
2869/**
2870 * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
2871 *
2872 * @adev: amdgpu_device pointer
2873 * @ring: amdgpu_ring for the engine handling the buffer operations
2874 * @bo: amdgpu_bo buffer whose shadow is being restored
2875 * @fence: dma_fence associated with the operation
2876 *
2877 * Restores the VRAM buffer contents from the shadow in GTT.  Used to
2878 * restore things like GPUVM page tables after a GPU reset where
2879 * the contents of VRAM might be lost.
2880 * Returns 0 on success, negative error code on failure.
2881 */
2882static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
2883                                                  struct amdgpu_ring *ring,
2884                                                  struct amdgpu_bo *bo,
2885                                                  struct dma_fence **fence)
2886{
2887        uint32_t domain;
2888        int r;
2889
2890        if (!bo->shadow)
2891                return 0;
2892
2893        r = amdgpu_bo_reserve(bo, true);
2894        if (r)
2895                return r;
2896        domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
2897        /* if bo has been evicted, then no need to recover */
2898        if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
2899                r = amdgpu_bo_validate(bo->shadow);
2900                if (r) {
2901                        DRM_ERROR("bo validate failed!\n");
2902                        goto err;
2903                }
2904
2905                r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
2906                                                 NULL, fence, true);
2907                if (r) {
2908                        DRM_ERROR("recover page table failed!\n");
2909                        goto err;
2910                }
2911        }
2912err:
2913        amdgpu_bo_unreserve(bo);
2914        return r;
2915}
2916
2917/**
2918 * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
2919 *
2920 * @adev: amdgpu_device pointer
2921 *
2922 * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
2923 * restore things like GPUVM page tables after a GPU reset where
2924 * the contents of VRAM might be lost.
2925 * Returns 0 on success, 1 on failure.
2926 */
2927static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
2928{
2929        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2930        struct amdgpu_bo *bo, *tmp;
2931        struct dma_fence *fence = NULL, *next = NULL;
2932        long r = 1;
2933        int i = 0;
2934        long tmo;
2935
2936        if (amdgpu_sriov_runtime(adev))
2937                tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
2938        else
2939                tmo = msecs_to_jiffies(100);
2940
2941        DRM_INFO("recover vram bo from shadow start\n");
2942        mutex_lock(&adev->shadow_list_lock);
2943        list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
2944                next = NULL;
2945                amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
2946                if (fence) {
2947                        r = dma_fence_wait_timeout(fence, false, tmo);
2948                        if (r == 0)
2949                                pr_err("wait fence %p[%d] timeout\n", fence, i);
2950                        else if (r < 0)
2951                                pr_err("wait fence %p[%d] interrupted\n", fence, i);
2952                        if (r < 1) {
2953                                dma_fence_put(fence);
2954                                fence = next;
2955                                break;
2956                        }
2957                        i++;
2958                }
2959
2960                dma_fence_put(fence);
2961                fence = next;
2962        }
2963        mutex_unlock(&adev->shadow_list_lock);
2964
2965        if (fence) {
2966                r = dma_fence_wait_timeout(fence, false, tmo);
2967                if (r == 0)
2968                        pr_err("wait fence %p[%d] timeout\n", fence, i);
2969                else if (r < 0)
2970                        pr_err("wait fence %p[%d] interrupted\n", fence, i);
2971
2972        }
2973        dma_fence_put(fence);
2974
2975        if (r > 0)
2976                DRM_INFO("recover vram bo from shadow done\n");
2977        else
2978                DRM_ERROR("recover vram bo from shadow failed\n");
2979
2980        return (r > 0) ? 0 : 1;
2981}
2982
2983/**
2984 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
2985 *
2986 * @adev: amdgpu device pointer
2987 *
2988 * attempt to do soft-reset or full-reset and reinitialize Asic
2989 * return 0 means successed otherwise failed
2990 */
2991static int amdgpu_device_reset(struct amdgpu_device *adev)
2992{
2993        bool need_full_reset, vram_lost = 0;
2994        int r;
2995
2996        need_full_reset = amdgpu_device_ip_need_full_reset(adev);
2997
2998        if (!need_full_reset) {
2999                amdgpu_device_ip_pre_soft_reset(adev);
3000                r = amdgpu_device_ip_soft_reset(adev);
3001                amdgpu_device_ip_post_soft_reset(adev);
3002                if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3003                        DRM_INFO("soft reset failed, will fallback to full reset!\n");
3004                        need_full_reset = true;
3005                }
3006        }
3007
3008        if (need_full_reset) {
3009                r = amdgpu_device_ip_suspend(adev);
3010
3011retry:
3012                r = amdgpu_asic_reset(adev);
3013                /* post card */
3014                amdgpu_atom_asic_init(adev->mode_info.atom_context);
3015
3016                if (!r) {
3017                        dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
3018                        r = amdgpu_device_ip_resume_phase1(adev);
3019                        if (r)
3020                                goto out;
3021
3022                        vram_lost = amdgpu_device_check_vram_lost(adev);
3023                        if (vram_lost) {
3024                                DRM_ERROR("VRAM is lost!\n");
3025                                atomic_inc(&adev->vram_lost_counter);
3026                        }
3027
3028                        r = amdgpu_gtt_mgr_recover(
3029                                &adev->mman.bdev.man[TTM_PL_TT]);
3030                        if (r)
3031                                goto out;
3032
3033                        r = amdgpu_device_ip_resume_phase2(adev);
3034                        if (r)
3035                                goto out;
3036
3037                        if (vram_lost)
3038                                amdgpu_device_fill_reset_magic(adev);
3039                }
3040        }
3041
3042out:
3043        if (!r) {
3044                amdgpu_irq_gpu_reset_resume_helper(adev);
3045                r = amdgpu_ib_ring_tests(adev);
3046                if (r) {
3047                        dev_err(adev->dev, "ib ring test failed (%d).\n", r);
3048                        r = amdgpu_device_ip_suspend(adev);
3049                        need_full_reset = true;
3050                        goto retry;
3051                }
3052        }
3053
3054        if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
3055                r = amdgpu_device_handle_vram_lost(adev);
3056
3057        return r;
3058}
3059
3060/**
3061 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3062 *
3063 * @adev: amdgpu device pointer
3064 *
3065 * do VF FLR and reinitialize Asic
3066 * return 0 means successed otherwise failed
3067 */
3068static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3069                                     bool from_hypervisor)
3070{
3071        int r;
3072
3073        if (from_hypervisor)
3074                r = amdgpu_virt_request_full_gpu(adev, true);
3075        else
3076                r = amdgpu_virt_reset_gpu(adev);
3077        if (r)
3078                return r;
3079
3080        /* Resume IP prior to SMC */
3081        r = amdgpu_device_ip_reinit_early_sriov(adev);
3082        if (r)
3083                goto error;
3084
3085        /* we need recover gart prior to run SMC/CP/SDMA resume */
3086        amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3087
3088        /* now we are okay to resume SMC/CP/SDMA */
3089        r = amdgpu_device_ip_reinit_late_sriov(adev);
3090        amdgpu_virt_release_full_gpu(adev, true);
3091        if (r)
3092                goto error;
3093
3094        amdgpu_irq_gpu_reset_resume_helper(adev);
3095        r = amdgpu_ib_ring_tests(adev);
3096
3097        if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3098                atomic_inc(&adev->vram_lost_counter);
3099                r = amdgpu_device_handle_vram_lost(adev);
3100        }
3101
3102error:
3103
3104        return r;
3105}
3106
3107/**
3108 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3109 *
3110 * @adev: amdgpu device pointer
3111 * @job: which job trigger hang
3112 * @force forces reset regardless of amdgpu_gpu_recovery
3113 *
3114 * Attempt to reset the GPU if it has hung (all asics).
3115 * Returns 0 for success or an error on failure.
3116 */
3117int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3118                              struct amdgpu_job *job, bool force)
3119{
3120        struct drm_atomic_state *state = NULL;
3121        int i, r, resched;
3122
3123        if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
3124                DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
3125                return 0;
3126        }
3127
3128        if (!force && (amdgpu_gpu_recovery == 0 ||
3129                        (amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))) {
3130                DRM_INFO("GPU recovery disabled.\n");
3131                return 0;
3132        }
3133
3134        dev_info(adev->dev, "GPU reset begin!\n");
3135
3136        mutex_lock(&adev->lock_reset);
3137        atomic_inc(&adev->gpu_reset_counter);
3138        adev->in_gpu_reset = 1;
3139
3140        /* block TTM */
3141        resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3142
3143        /* store modesetting */
3144        if (amdgpu_device_has_dc_support(adev))
3145                state = drm_atomic_helper_suspend(adev->ddev);
3146
3147        /* block all schedulers and reset given job's ring */
3148        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3149                struct amdgpu_ring *ring = adev->rings[i];
3150
3151                if (!ring || !ring->sched.thread)
3152                        continue;
3153
3154                kthread_park(ring->sched.thread);
3155
3156                if (job && job->ring->idx != i)
3157                        continue;
3158
3159                drm_sched_hw_job_reset(&ring->sched, &job->base);
3160
3161                /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3162                amdgpu_fence_driver_force_completion(ring);
3163        }
3164
3165        if (amdgpu_sriov_vf(adev))
3166                r = amdgpu_device_reset_sriov(adev, job ? false : true);
3167        else
3168                r = amdgpu_device_reset(adev);
3169
3170        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3171                struct amdgpu_ring *ring = adev->rings[i];
3172
3173                if (!ring || !ring->sched.thread)
3174                        continue;
3175
3176                /* only need recovery sched of the given job's ring
3177                 * or all rings (in the case @job is NULL)
3178                 * after above amdgpu_reset accomplished
3179                 */
3180                if ((!job || job->ring->idx == i) && !r)
3181                        drm_sched_job_recovery(&ring->sched);
3182
3183                kthread_unpark(ring->sched.thread);
3184        }
3185
3186        if (amdgpu_device_has_dc_support(adev)) {
3187                if (drm_atomic_helper_resume(adev->ddev, state))
3188                        dev_info(adev->dev, "drm resume failed:%d\n", r);
3189        } else {
3190                drm_helper_resume_force_mode(adev->ddev);
3191        }
3192
3193        ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
3194
3195        if (r) {
3196                /* bad news, how to tell it to userspace ? */
3197                dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3198                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3199        } else {
3200                dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter));
3201        }
3202
3203        amdgpu_vf_error_trans_all(adev);
3204        adev->in_gpu_reset = 0;
3205        mutex_unlock(&adev->lock_reset);
3206        return r;
3207}
3208
3209/**
3210 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3211 *
3212 * @adev: amdgpu_device pointer
3213 *
3214 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3215 * and lanes) of the slot the device is in. Handles APUs and
3216 * virtualized environments where PCIE config space may not be available.
3217 */
3218static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3219{
3220        u32 mask;
3221        int ret;
3222
3223        if (amdgpu_pcie_gen_cap)
3224                adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
3225
3226        if (amdgpu_pcie_lane_cap)
3227                adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
3228
3229        /* covers APUs as well */
3230        if (pci_is_root_bus(adev->pdev->bus)) {
3231                if (adev->pm.pcie_gen_mask == 0)
3232                        adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3233                if (adev->pm.pcie_mlw_mask == 0)
3234                        adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3235                return;
3236        }
3237
3238        if (adev->pm.pcie_gen_mask == 0) {
3239                ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
3240                if (!ret) {
3241                        adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3242                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3243                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3244
3245                        if (mask & DRM_PCIE_SPEED_25)
3246                                adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3247                        if (mask & DRM_PCIE_SPEED_50)
3248                                adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
3249                        if (mask & DRM_PCIE_SPEED_80)
3250                                adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
3251                } else {
3252                        adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3253                }
3254        }
3255        if (adev->pm.pcie_mlw_mask == 0) {
3256                ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
3257                if (!ret) {
3258                        switch (mask) {
3259                        case 32:
3260                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3261                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3262                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3263                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3264                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3265                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3266                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3267                                break;
3268                        case 16:
3269                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3270                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3271                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3272                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3273                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3274                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3275                                break;
3276                        case 12:
3277                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3278                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3279                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3280                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3281                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3282                                break;
3283                        case 8:
3284                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3285                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3286                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3287                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3288                                break;
3289                        case 4:
3290                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3291                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3292                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3293                                break;
3294                        case 2:
3295                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3296                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3297                                break;
3298                        case 1:
3299                                adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3300                                break;
3301                        default:
3302                                break;
3303                        }
3304                } else {
3305                        adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3306                }
3307        }
3308}
3309
3310