linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/power_supply.h>
  29#include <linux/kthread.h>
  30#include <linux/module.h>
  31#include <linux/console.h>
  32#include <linux/slab.h>
  33
  34#include <drm/drm_atomic_helper.h>
  35#include <drm/drm_probe_helper.h>
  36#include <drm/amdgpu_drm.h>
  37#include <linux/vgaarb.h>
  38#include <linux/vga_switcheroo.h>
  39#include <linux/efi.h>
  40#include "amdgpu.h"
  41#include "amdgpu_trace.h"
  42#include "amdgpu_i2c.h"
  43#include "atom.h"
  44#include "amdgpu_atombios.h"
  45#include "amdgpu_atomfirmware.h"
  46#include "amd_pcie.h"
  47#ifdef CONFIG_DRM_AMDGPU_SI
  48#include "si.h"
  49#endif
  50#ifdef CONFIG_DRM_AMDGPU_CIK
  51#include "cik.h"
  52#endif
  53#include "vi.h"
  54#include "soc15.h"
  55#include "nv.h"
  56#include "bif/bif_4_1_d.h"
  57#include <linux/pci.h>
  58#include <linux/firmware.h>
  59#include "amdgpu_vf_error.h"
  60
  61#include "amdgpu_amdkfd.h"
  62#include "amdgpu_pm.h"
  63
  64#include "amdgpu_xgmi.h"
  65#include "amdgpu_ras.h"
  66#include "amdgpu_pmu.h"
  67#include "amdgpu_fru_eeprom.h"
  68
  69#include <linux/suspend.h>
  70#include <drm/task_barrier.h>
  71#include <linux/pm_runtime.h>
  72
  73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
  80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
  81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
  82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  83
  84#define AMDGPU_RESUME_MS                2000
  85
  86const char *amdgpu_asic_name[] = {
  87        "TAHITI",
  88        "PITCAIRN",
  89        "VERDE",
  90        "OLAND",
  91        "HAINAN",
  92        "BONAIRE",
  93        "KAVERI",
  94        "KABINI",
  95        "HAWAII",
  96        "MULLINS",
  97        "TOPAZ",
  98        "TONGA",
  99        "FIJI",
 100        "CARRIZO",
 101        "STONEY",
 102        "POLARIS10",
 103        "POLARIS11",
 104        "POLARIS12",
 105        "VEGAM",
 106        "VEGA10",
 107        "VEGA12",
 108        "VEGA20",
 109        "RAVEN",
 110        "ARCTURUS",
 111        "RENOIR",
 112        "NAVI10",
 113        "NAVI14",
 114        "NAVI12",
 115        "SIENNA_CICHLID",
 116        "NAVY_FLOUNDER",
 117        "LAST",
 118};
 119
 120/**
 121 * DOC: pcie_replay_count
 122 *
 123 * The amdgpu driver provides a sysfs API for reporting the total number
 124 * of PCIe replays (NAKs)
 125 * The file pcie_replay_count is used for this and returns the total
 126 * number of replays as a sum of the NAKs generated and NAKs received
 127 */
 128
 129static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 130                struct device_attribute *attr, char *buf)
 131{
 132        struct drm_device *ddev = dev_get_drvdata(dev);
 133        struct amdgpu_device *adev = ddev->dev_private;
 134        uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 135
 136        return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
 137}
 138
 139static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 140                amdgpu_device_get_pcie_replay_count, NULL);
 141
 142static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 143
 144/**
 145 * DOC: product_name
 146 *
 147 * The amdgpu driver provides a sysfs API for reporting the product name
 148 * for the device
 149 * The file serial_number is used for this and returns the product name
 150 * as returned from the FRU.
 151 * NOTE: This is only available for certain server cards
 152 */
 153
 154static ssize_t amdgpu_device_get_product_name(struct device *dev,
 155                struct device_attribute *attr, char *buf)
 156{
 157        struct drm_device *ddev = dev_get_drvdata(dev);
 158        struct amdgpu_device *adev = ddev->dev_private;
 159
 160        return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
 161}
 162
 163static DEVICE_ATTR(product_name, S_IRUGO,
 164                amdgpu_device_get_product_name, NULL);
 165
 166/**
 167 * DOC: product_number
 168 *
 169 * The amdgpu driver provides a sysfs API for reporting the part number
 170 * for the device
 171 * The file serial_number is used for this and returns the part number
 172 * as returned from the FRU.
 173 * NOTE: This is only available for certain server cards
 174 */
 175
 176static ssize_t amdgpu_device_get_product_number(struct device *dev,
 177                struct device_attribute *attr, char *buf)
 178{
 179        struct drm_device *ddev = dev_get_drvdata(dev);
 180        struct amdgpu_device *adev = ddev->dev_private;
 181
 182        return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
 183}
 184
 185static DEVICE_ATTR(product_number, S_IRUGO,
 186                amdgpu_device_get_product_number, NULL);
 187
 188/**
 189 * DOC: serial_number
 190 *
 191 * The amdgpu driver provides a sysfs API for reporting the serial number
 192 * for the device
 193 * The file serial_number is used for this and returns the serial number
 194 * as returned from the FRU.
 195 * NOTE: This is only available for certain server cards
 196 */
 197
 198static ssize_t amdgpu_device_get_serial_number(struct device *dev,
 199                struct device_attribute *attr, char *buf)
 200{
 201        struct drm_device *ddev = dev_get_drvdata(dev);
 202        struct amdgpu_device *adev = ddev->dev_private;
 203
 204        return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
 205}
 206
 207static DEVICE_ATTR(serial_number, S_IRUGO,
 208                amdgpu_device_get_serial_number, NULL);
 209
 210/**
 211 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
 212 *
 213 * @dev: drm_device pointer
 214 *
 215 * Returns true if the device is a dGPU with HG/PX power control,
 216 * otherwise return false.
 217 */
 218bool amdgpu_device_supports_boco(struct drm_device *dev)
 219{
 220        struct amdgpu_device *adev = dev->dev_private;
 221
 222        if (adev->flags & AMD_IS_PX)
 223                return true;
 224        return false;
 225}
 226
 227/**
 228 * amdgpu_device_supports_baco - Does the device support BACO
 229 *
 230 * @dev: drm_device pointer
 231 *
 232 * Returns true if the device supporte BACO,
 233 * otherwise return false.
 234 */
 235bool amdgpu_device_supports_baco(struct drm_device *dev)
 236{
 237        struct amdgpu_device *adev = dev->dev_private;
 238
 239        return amdgpu_asic_supports_baco(adev);
 240}
 241
 242/**
 243 * VRAM access helper functions.
 244 *
 245 * amdgpu_device_vram_access - read/write a buffer in vram
 246 *
 247 * @adev: amdgpu_device pointer
 248 * @pos: offset of the buffer in vram
 249 * @buf: virtual address of the buffer in system memory
 250 * @size: read/write size, sizeof(@buf) must > @size
 251 * @write: true - write to vram, otherwise - read from vram
 252 */
 253void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 254                               uint32_t *buf, size_t size, bool write)
 255{
 256        unsigned long flags;
 257        uint32_t hi = ~0;
 258        uint64_t last;
 259
 260
 261#ifdef CONFIG_64BIT
 262        last = min(pos + size, adev->gmc.visible_vram_size);
 263        if (last > pos) {
 264                void __iomem *addr = adev->mman.aper_base_kaddr + pos;
 265                size_t count = last - pos;
 266
 267                if (write) {
 268                        memcpy_toio(addr, buf, count);
 269                        mb();
 270                        amdgpu_asic_flush_hdp(adev, NULL);
 271                } else {
 272                        amdgpu_asic_invalidate_hdp(adev, NULL);
 273                        mb();
 274                        memcpy_fromio(buf, addr, count);
 275                }
 276
 277                if (count == size)
 278                        return;
 279
 280                pos += count;
 281                buf += count / 4;
 282                size -= count;
 283        }
 284#endif
 285
 286        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 287        for (last = pos + size; pos < last; pos += 4) {
 288                uint32_t tmp = pos >> 31;
 289
 290                WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 291                if (tmp != hi) {
 292                        WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 293                        hi = tmp;
 294                }
 295                if (write)
 296                        WREG32_NO_KIQ(mmMM_DATA, *buf++);
 297                else
 298                        *buf++ = RREG32_NO_KIQ(mmMM_DATA);
 299        }
 300        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 301}
 302
 303/*
 304 * MMIO register access helper functions.
 305 */
 306/**
 307 * amdgpu_mm_rreg - read a memory mapped IO register
 308 *
 309 * @adev: amdgpu_device pointer
 310 * @reg: dword aligned register offset
 311 * @acc_flags: access flags which require special behavior
 312 *
 313 * Returns the 32 bit value from the offset specified.
 314 */
 315uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 316                        uint32_t acc_flags)
 317{
 318        uint32_t ret;
 319
 320        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 321                return amdgpu_kiq_rreg(adev, reg);
 322
 323        if ((reg * 4) < adev->rmmio_size)
 324                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 325        else {
 326                unsigned long flags;
 327
 328                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 329                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 330                ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 331                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 332        }
 333        trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
 334        return ret;
 335}
 336
 337/*
 338 * MMIO register read with bytes helper functions
 339 * @offset:bytes offset from MMIO start
 340 *
 341*/
 342
 343/**
 344 * amdgpu_mm_rreg8 - read a memory mapped IO register
 345 *
 346 * @adev: amdgpu_device pointer
 347 * @offset: byte aligned register offset
 348 *
 349 * Returns the 8 bit value from the offset specified.
 350 */
 351uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
 352        if (offset < adev->rmmio_size)
 353                return (readb(adev->rmmio + offset));
 354        BUG();
 355}
 356
 357/*
 358 * MMIO register write with bytes helper functions
 359 * @offset:bytes offset from MMIO start
 360 * @value: the value want to be written to the register
 361 *
 362*/
 363/**
 364 * amdgpu_mm_wreg8 - read a memory mapped IO register
 365 *
 366 * @adev: amdgpu_device pointer
 367 * @offset: byte aligned register offset
 368 * @value: 8 bit value to write
 369 *
 370 * Writes the value specified to the offset specified.
 371 */
 372void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
 373        if (offset < adev->rmmio_size)
 374                writeb(value, adev->rmmio + offset);
 375        else
 376                BUG();
 377}
 378
 379void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
 380{
 381        trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
 382
 383        if ((reg * 4) < adev->rmmio_size)
 384                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 385        else {
 386                unsigned long flags;
 387
 388                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 389                writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 390                writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 391                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 392        }
 393}
 394
 395/**
 396 * amdgpu_mm_wreg - write to a memory mapped IO register
 397 *
 398 * @adev: amdgpu_device pointer
 399 * @reg: dword aligned register offset
 400 * @v: 32 bit value to write to the register
 401 * @acc_flags: access flags which require special behavior
 402 *
 403 * Writes the value specified to the offset specified.
 404 */
 405void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 406                    uint32_t acc_flags)
 407{
 408        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 409                return amdgpu_kiq_wreg(adev, reg, v);
 410
 411        amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
 412}
 413
 414/*
 415 * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
 416 *
 417 * this function is invoked only the debugfs register access
 418 * */
 419void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 420                    uint32_t acc_flags)
 421{
 422        if (amdgpu_sriov_fullaccess(adev) &&
 423                adev->gfx.rlc.funcs &&
 424                adev->gfx.rlc.funcs->is_rlcg_access_range) {
 425
 426                if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 427                        return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
 428        }
 429
 430        amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
 431}
 432
 433/**
 434 * amdgpu_io_rreg - read an IO register
 435 *
 436 * @adev: amdgpu_device pointer
 437 * @reg: dword aligned register offset
 438 *
 439 * Returns the 32 bit value from the offset specified.
 440 */
 441u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 442{
 443        if ((reg * 4) < adev->rio_mem_size)
 444                return ioread32(adev->rio_mem + (reg * 4));
 445        else {
 446                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 447                return ioread32(adev->rio_mem + (mmMM_DATA * 4));
 448        }
 449}
 450
 451/**
 452 * amdgpu_io_wreg - write to an IO register
 453 *
 454 * @adev: amdgpu_device pointer
 455 * @reg: dword aligned register offset
 456 * @v: 32 bit value to write to the register
 457 *
 458 * Writes the value specified to the offset specified.
 459 */
 460void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 461{
 462        if ((reg * 4) < adev->rio_mem_size)
 463                iowrite32(v, adev->rio_mem + (reg * 4));
 464        else {
 465                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 466                iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
 467        }
 468}
 469
 470/**
 471 * amdgpu_mm_rdoorbell - read a doorbell dword
 472 *
 473 * @adev: amdgpu_device pointer
 474 * @index: doorbell index
 475 *
 476 * Returns the value in the doorbell aperture at the
 477 * requested doorbell index (CIK).
 478 */
 479u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 480{
 481        if (index < adev->doorbell.num_doorbells) {
 482                return readl(adev->doorbell.ptr + index);
 483        } else {
 484                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 485                return 0;
 486        }
 487}
 488
 489/**
 490 * amdgpu_mm_wdoorbell - write a doorbell dword
 491 *
 492 * @adev: amdgpu_device pointer
 493 * @index: doorbell index
 494 * @v: value to write
 495 *
 496 * Writes @v to the doorbell aperture at the
 497 * requested doorbell index (CIK).
 498 */
 499void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 500{
 501        if (index < adev->doorbell.num_doorbells) {
 502                writel(v, adev->doorbell.ptr + index);
 503        } else {
 504                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 505        }
 506}
 507
 508/**
 509 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 510 *
 511 * @adev: amdgpu_device pointer
 512 * @index: doorbell index
 513 *
 514 * Returns the value in the doorbell aperture at the
 515 * requested doorbell index (VEGA10+).
 516 */
 517u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 518{
 519        if (index < adev->doorbell.num_doorbells) {
 520                return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 521        } else {
 522                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 523                return 0;
 524        }
 525}
 526
 527/**
 528 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 529 *
 530 * @adev: amdgpu_device pointer
 531 * @index: doorbell index
 532 * @v: value to write
 533 *
 534 * Writes @v to the doorbell aperture at the
 535 * requested doorbell index (VEGA10+).
 536 */
 537void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 538{
 539        if (index < adev->doorbell.num_doorbells) {
 540                atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 541        } else {
 542                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 543        }
 544}
 545
 546/**
 547 * amdgpu_invalid_rreg - dummy reg read function
 548 *
 549 * @adev: amdgpu device pointer
 550 * @reg: offset of register
 551 *
 552 * Dummy register read function.  Used for register blocks
 553 * that certain asics don't have (all asics).
 554 * Returns the value in the register.
 555 */
 556static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 557{
 558        DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 559        BUG();
 560        return 0;
 561}
 562
 563/**
 564 * amdgpu_invalid_wreg - dummy reg write function
 565 *
 566 * @adev: amdgpu device pointer
 567 * @reg: offset of register
 568 * @v: value to write to the register
 569 *
 570 * Dummy register read function.  Used for register blocks
 571 * that certain asics don't have (all asics).
 572 */
 573static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 574{
 575        DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 576                  reg, v);
 577        BUG();
 578}
 579
 580/**
 581 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 582 *
 583 * @adev: amdgpu device pointer
 584 * @reg: offset of register
 585 *
 586 * Dummy register read function.  Used for register blocks
 587 * that certain asics don't have (all asics).
 588 * Returns the value in the register.
 589 */
 590static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 591{
 592        DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 593        BUG();
 594        return 0;
 595}
 596
 597/**
 598 * amdgpu_invalid_wreg64 - dummy reg write function
 599 *
 600 * @adev: amdgpu device pointer
 601 * @reg: offset of register
 602 * @v: value to write to the register
 603 *
 604 * Dummy register read function.  Used for register blocks
 605 * that certain asics don't have (all asics).
 606 */
 607static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 608{
 609        DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 610                  reg, v);
 611        BUG();
 612}
 613
 614/**
 615 * amdgpu_block_invalid_rreg - dummy reg read function
 616 *
 617 * @adev: amdgpu device pointer
 618 * @block: offset of instance
 619 * @reg: offset of register
 620 *
 621 * Dummy register read function.  Used for register blocks
 622 * that certain asics don't have (all asics).
 623 * Returns the value in the register.
 624 */
 625static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 626                                          uint32_t block, uint32_t reg)
 627{
 628        DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 629                  reg, block);
 630        BUG();
 631        return 0;
 632}
 633
 634/**
 635 * amdgpu_block_invalid_wreg - dummy reg write function
 636 *
 637 * @adev: amdgpu device pointer
 638 * @block: offset of instance
 639 * @reg: offset of register
 640 * @v: value to write to the register
 641 *
 642 * Dummy register read function.  Used for register blocks
 643 * that certain asics don't have (all asics).
 644 */
 645static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 646                                      uint32_t block,
 647                                      uint32_t reg, uint32_t v)
 648{
 649        DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 650                  reg, block, v);
 651        BUG();
 652}
 653
 654/**
 655 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 656 *
 657 * @adev: amdgpu device pointer
 658 *
 659 * Allocates a scratch page of VRAM for use by various things in the
 660 * driver.
 661 */
 662static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 663{
 664        return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 665                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 666                                       &adev->vram_scratch.robj,
 667                                       &adev->vram_scratch.gpu_addr,
 668                                       (void **)&adev->vram_scratch.ptr);
 669}
 670
 671/**
 672 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 673 *
 674 * @adev: amdgpu device pointer
 675 *
 676 * Frees the VRAM scratch page.
 677 */
 678static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 679{
 680        amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 681}
 682
 683/**
 684 * amdgpu_device_program_register_sequence - program an array of registers.
 685 *
 686 * @adev: amdgpu_device pointer
 687 * @registers: pointer to the register array
 688 * @array_size: size of the register array
 689 *
 690 * Programs an array or registers with and and or masks.
 691 * This is a helper for setting golden registers.
 692 */
 693void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 694                                             const u32 *registers,
 695                                             const u32 array_size)
 696{
 697        u32 tmp, reg, and_mask, or_mask;
 698        int i;
 699
 700        if (array_size % 3)
 701                return;
 702
 703        for (i = 0; i < array_size; i +=3) {
 704                reg = registers[i + 0];
 705                and_mask = registers[i + 1];
 706                or_mask = registers[i + 2];
 707
 708                if (and_mask == 0xffffffff) {
 709                        tmp = or_mask;
 710                } else {
 711                        tmp = RREG32(reg);
 712                        tmp &= ~and_mask;
 713                        if (adev->family >= AMDGPU_FAMILY_AI)
 714                                tmp |= (or_mask & and_mask);
 715                        else
 716                                tmp |= or_mask;
 717                }
 718                WREG32(reg, tmp);
 719        }
 720}
 721
 722/**
 723 * amdgpu_device_pci_config_reset - reset the GPU
 724 *
 725 * @adev: amdgpu_device pointer
 726 *
 727 * Resets the GPU using the pci config reset sequence.
 728 * Only applicable to asics prior to vega10.
 729 */
 730void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 731{
 732        pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 733}
 734
 735/*
 736 * GPU doorbell aperture helpers function.
 737 */
 738/**
 739 * amdgpu_device_doorbell_init - Init doorbell driver information.
 740 *
 741 * @adev: amdgpu_device pointer
 742 *
 743 * Init doorbell driver information (CIK)
 744 * Returns 0 on success, error on failure.
 745 */
 746static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 747{
 748
 749        /* No doorbell on SI hardware generation */
 750        if (adev->asic_type < CHIP_BONAIRE) {
 751                adev->doorbell.base = 0;
 752                adev->doorbell.size = 0;
 753                adev->doorbell.num_doorbells = 0;
 754                adev->doorbell.ptr = NULL;
 755                return 0;
 756        }
 757
 758        if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 759                return -EINVAL;
 760
 761        amdgpu_asic_init_doorbell_index(adev);
 762
 763        /* doorbell bar mapping */
 764        adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 765        adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 766
 767        adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 768                                             adev->doorbell_index.max_assignment+1);
 769        if (adev->doorbell.num_doorbells == 0)
 770                return -EINVAL;
 771
 772        /* For Vega, reserve and map two pages on doorbell BAR since SDMA
 773         * paging queue doorbell use the second page. The
 774         * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
 775         * doorbells are in the first page. So with paging queue enabled,
 776         * the max num_doorbells should + 1 page (0x400 in dword)
 777         */
 778        if (adev->asic_type >= CHIP_VEGA10)
 779                adev->doorbell.num_doorbells += 0x400;
 780
 781        adev->doorbell.ptr = ioremap(adev->doorbell.base,
 782                                     adev->doorbell.num_doorbells *
 783                                     sizeof(u32));
 784        if (adev->doorbell.ptr == NULL)
 785                return -ENOMEM;
 786
 787        return 0;
 788}
 789
 790/**
 791 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
 792 *
 793 * @adev: amdgpu_device pointer
 794 *
 795 * Tear down doorbell driver information (CIK)
 796 */
 797static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
 798{
 799        iounmap(adev->doorbell.ptr);
 800        adev->doorbell.ptr = NULL;
 801}
 802
 803
 804
 805/*
 806 * amdgpu_device_wb_*()
 807 * Writeback is the method by which the GPU updates special pages in memory
 808 * with the status of certain GPU events (fences, ring pointers,etc.).
 809 */
 810
 811/**
 812 * amdgpu_device_wb_fini - Disable Writeback and free memory
 813 *
 814 * @adev: amdgpu_device pointer
 815 *
 816 * Disables Writeback and frees the Writeback memory (all asics).
 817 * Used at driver shutdown.
 818 */
 819static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
 820{
 821        if (adev->wb.wb_obj) {
 822                amdgpu_bo_free_kernel(&adev->wb.wb_obj,
 823                                      &adev->wb.gpu_addr,
 824                                      (void **)&adev->wb.wb);
 825                adev->wb.wb_obj = NULL;
 826        }
 827}
 828
 829/**
 830 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
 831 *
 832 * @adev: amdgpu_device pointer
 833 *
 834 * Initializes writeback and allocates writeback memory (all asics).
 835 * Used at driver startup.
 836 * Returns 0 on success or an -error on failure.
 837 */
 838static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 839{
 840        int r;
 841
 842        if (adev->wb.wb_obj == NULL) {
 843                /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
 844                r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
 845                                            PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 846                                            &adev->wb.wb_obj, &adev->wb.gpu_addr,
 847                                            (void **)&adev->wb.wb);
 848                if (r) {
 849                        dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
 850                        return r;
 851                }
 852
 853                adev->wb.num_wb = AMDGPU_MAX_WB;
 854                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 855
 856                /* clear wb memory */
 857                memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 858        }
 859
 860        return 0;
 861}
 862
 863/**
 864 * amdgpu_device_wb_get - Allocate a wb entry
 865 *
 866 * @adev: amdgpu_device pointer
 867 * @wb: wb index
 868 *
 869 * Allocate a wb slot for use by the driver (all asics).
 870 * Returns 0 on success or -EINVAL on failure.
 871 */
 872int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 873{
 874        unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 875
 876        if (offset < adev->wb.num_wb) {
 877                __set_bit(offset, adev->wb.used);
 878                *wb = offset << 3; /* convert to dw offset */
 879                return 0;
 880        } else {
 881                return -EINVAL;
 882        }
 883}
 884
 885/**
 886 * amdgpu_device_wb_free - Free a wb entry
 887 *
 888 * @adev: amdgpu_device pointer
 889 * @wb: wb index
 890 *
 891 * Free a wb slot allocated for use by the driver (all asics)
 892 */
 893void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 894{
 895        wb >>= 3;
 896        if (wb < adev->wb.num_wb)
 897                __clear_bit(wb, adev->wb.used);
 898}
 899
 900/**
 901 * amdgpu_device_resize_fb_bar - try to resize FB BAR
 902 *
 903 * @adev: amdgpu_device pointer
 904 *
 905 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
 906 * to fail, but if any of the BARs is not accessible after the size we abort
 907 * driver loading by returning -ENODEV.
 908 */
 909int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 910{
 911        u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 912        u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 913        struct pci_bus *root;
 914        struct resource *res;
 915        unsigned i;
 916        u16 cmd;
 917        int r;
 918
 919        /* Bypass for VF */
 920        if (amdgpu_sriov_vf(adev))
 921                return 0;
 922
 923        /* skip if the bios has already enabled large BAR */
 924        if (adev->gmc.real_vram_size &&
 925            (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
 926                return 0;
 927
 928        /* Check if the root BUS has 64bit memory resources */
 929        root = adev->pdev->bus;
 930        while (root->parent)
 931                root = root->parent;
 932
 933        pci_bus_for_each_resource(root, res, i) {
 934                if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 935                    res->start > 0x100000000ull)
 936                        break;
 937        }
 938
 939        /* Trying to resize is pointless without a root hub window above 4GB */
 940        if (!res)
 941                return 0;
 942
 943        /* Disable memory decoding while we change the BAR addresses and size */
 944        pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
 945        pci_write_config_word(adev->pdev, PCI_COMMAND,
 946                              cmd & ~PCI_COMMAND_MEMORY);
 947
 948        /* Free the VRAM and doorbell BAR, we most likely need to move both. */
 949        amdgpu_device_doorbell_fini(adev);
 950        if (adev->asic_type >= CHIP_BONAIRE)
 951                pci_release_resource(adev->pdev, 2);
 952
 953        pci_release_resource(adev->pdev, 0);
 954
 955        r = pci_resize_resource(adev->pdev, 0, rbar_size);
 956        if (r == -ENOSPC)
 957                DRM_INFO("Not enough PCI address space for a large BAR.");
 958        else if (r && r != -ENOTSUPP)
 959                DRM_ERROR("Problem resizing BAR0 (%d).", r);
 960
 961        pci_assign_unassigned_bus_resources(adev->pdev->bus);
 962
 963        /* When the doorbell or fb BAR isn't available we have no chance of
 964         * using the device.
 965         */
 966        r = amdgpu_device_doorbell_init(adev);
 967        if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
 968                return -ENODEV;
 969
 970        pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
 971
 972        return 0;
 973}
 974
 975/*
 976 * GPU helpers function.
 977 */
 978/**
 979 * amdgpu_device_need_post - check if the hw need post or not
 980 *
 981 * @adev: amdgpu_device pointer
 982 *
 983 * Check if the asic has been initialized (all asics) at driver startup
 984 * or post is needed if  hw reset is performed.
 985 * Returns true if need or false if not.
 986 */
 987bool amdgpu_device_need_post(struct amdgpu_device *adev)
 988{
 989        uint32_t reg;
 990
 991        if (amdgpu_sriov_vf(adev))
 992                return false;
 993
 994        if (amdgpu_passthrough(adev)) {
 995                /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
 996                 * some old smc fw still need driver do vPost otherwise gpu hang, while
 997                 * those smc fw version above 22.15 doesn't have this flaw, so we force
 998                 * vpost executed for smc version below 22.15
 999                 */
1000                if (adev->asic_type == CHIP_FIJI) {
1001                        int err;
1002                        uint32_t fw_ver;
1003                        err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1004                        /* force vPost if error occured */
1005                        if (err)
1006                                return true;
1007
1008                        fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1009                        if (fw_ver < 0x00160e00)
1010                                return true;
1011                }
1012        }
1013
1014        if (adev->has_hw_reset) {
1015                adev->has_hw_reset = false;
1016                return true;
1017        }
1018
1019        /* bios scratch used on CIK+ */
1020        if (adev->asic_type >= CHIP_BONAIRE)
1021                return amdgpu_atombios_scratch_need_asic_init(adev);
1022
1023        /* check MEM_SIZE for older asics */
1024        reg = amdgpu_asic_get_config_memsize(adev);
1025
1026        if ((reg != 0) && (reg != 0xffffffff))
1027                return false;
1028
1029        return true;
1030}
1031
1032/* if we get transitioned to only one device, take VGA back */
1033/**
1034 * amdgpu_device_vga_set_decode - enable/disable vga decode
1035 *
1036 * @cookie: amdgpu_device pointer
1037 * @state: enable/disable vga decode
1038 *
1039 * Enable/disable vga decode (all asics).
1040 * Returns VGA resource flags.
1041 */
1042static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1043{
1044        struct amdgpu_device *adev = cookie;
1045        amdgpu_asic_set_vga_state(adev, state);
1046        if (state)
1047                return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1048                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1049        else
1050                return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1051}
1052
1053/**
1054 * amdgpu_device_check_block_size - validate the vm block size
1055 *
1056 * @adev: amdgpu_device pointer
1057 *
1058 * Validates the vm block size specified via module parameter.
1059 * The vm block size defines number of bits in page table versus page directory,
1060 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1061 * page table and the remaining bits are in the page directory.
1062 */
1063static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1064{
1065        /* defines number of bits in page table versus page directory,
1066         * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1067         * page table and the remaining bits are in the page directory */
1068        if (amdgpu_vm_block_size == -1)
1069                return;
1070
1071        if (amdgpu_vm_block_size < 9) {
1072                dev_warn(adev->dev, "VM page table size (%d) too small\n",
1073                         amdgpu_vm_block_size);
1074                amdgpu_vm_block_size = -1;
1075        }
1076}
1077
1078/**
1079 * amdgpu_device_check_vm_size - validate the vm size
1080 *
1081 * @adev: amdgpu_device pointer
1082 *
1083 * Validates the vm size in GB specified via module parameter.
1084 * The VM size is the size of the GPU virtual memory space in GB.
1085 */
1086static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1087{
1088        /* no need to check the default value */
1089        if (amdgpu_vm_size == -1)
1090                return;
1091
1092        if (amdgpu_vm_size < 1) {
1093                dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1094                         amdgpu_vm_size);
1095                amdgpu_vm_size = -1;
1096        }
1097}
1098
1099static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1100{
1101        struct sysinfo si;
1102        bool is_os_64 = (sizeof(void *) == 8);
1103        uint64_t total_memory;
1104        uint64_t dram_size_seven_GB = 0x1B8000000;
1105        uint64_t dram_size_three_GB = 0xB8000000;
1106
1107        if (amdgpu_smu_memory_pool_size == 0)
1108                return;
1109
1110        if (!is_os_64) {
1111                DRM_WARN("Not 64-bit OS, feature not supported\n");
1112                goto def_value;
1113        }
1114        si_meminfo(&si);
1115        total_memory = (uint64_t)si.totalram * si.mem_unit;
1116
1117        if ((amdgpu_smu_memory_pool_size == 1) ||
1118                (amdgpu_smu_memory_pool_size == 2)) {
1119                if (total_memory < dram_size_three_GB)
1120                        goto def_value1;
1121        } else if ((amdgpu_smu_memory_pool_size == 4) ||
1122                (amdgpu_smu_memory_pool_size == 8)) {
1123                if (total_memory < dram_size_seven_GB)
1124                        goto def_value1;
1125        } else {
1126                DRM_WARN("Smu memory pool size not supported\n");
1127                goto def_value;
1128        }
1129        adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1130
1131        return;
1132
1133def_value1:
1134        DRM_WARN("No enough system memory\n");
1135def_value:
1136        adev->pm.smu_prv_buffer_size = 0;
1137}
1138
1139/**
1140 * amdgpu_device_check_arguments - validate module params
1141 *
1142 * @adev: amdgpu_device pointer
1143 *
1144 * Validates certain module parameters and updates
1145 * the associated values used by the driver (all asics).
1146 */
1147static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1148{
1149        if (amdgpu_sched_jobs < 4) {
1150                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1151                         amdgpu_sched_jobs);
1152                amdgpu_sched_jobs = 4;
1153        } else if (!is_power_of_2(amdgpu_sched_jobs)){
1154                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1155                         amdgpu_sched_jobs);
1156                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1157        }
1158
1159        if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1160                /* gart size must be greater or equal to 32M */
1161                dev_warn(adev->dev, "gart size (%d) too small\n",
1162                         amdgpu_gart_size);
1163                amdgpu_gart_size = -1;
1164        }
1165
1166        if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1167                /* gtt size must be greater or equal to 32M */
1168                dev_warn(adev->dev, "gtt size (%d) too small\n",
1169                                 amdgpu_gtt_size);
1170                amdgpu_gtt_size = -1;
1171        }
1172
1173        /* valid range is between 4 and 9 inclusive */
1174        if (amdgpu_vm_fragment_size != -1 &&
1175            (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1176                dev_warn(adev->dev, "valid range is between 4 and 9\n");
1177                amdgpu_vm_fragment_size = -1;
1178        }
1179
1180        if (amdgpu_sched_hw_submission < 2) {
1181                dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1182                         amdgpu_sched_hw_submission);
1183                amdgpu_sched_hw_submission = 2;
1184        } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1185                dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1186                         amdgpu_sched_hw_submission);
1187                amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1188        }
1189
1190        amdgpu_device_check_smu_prv_buffer_size(adev);
1191
1192        amdgpu_device_check_vm_size(adev);
1193
1194        amdgpu_device_check_block_size(adev);
1195
1196        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1197
1198        amdgpu_gmc_tmz_set(adev);
1199
1200        return 0;
1201}
1202
1203/**
1204 * amdgpu_switcheroo_set_state - set switcheroo state
1205 *
1206 * @pdev: pci dev pointer
1207 * @state: vga_switcheroo state
1208 *
1209 * Callback for the switcheroo driver.  Suspends or resumes the
1210 * the asics before or after it is powered up using ACPI methods.
1211 */
1212static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1213{
1214        struct drm_device *dev = pci_get_drvdata(pdev);
1215        int r;
1216
1217        if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1218                return;
1219
1220        if (state == VGA_SWITCHEROO_ON) {
1221                pr_info("switched on\n");
1222                /* don't suspend or resume card normally */
1223                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1224
1225                pci_set_power_state(dev->pdev, PCI_D0);
1226                pci_restore_state(dev->pdev);
1227                r = pci_enable_device(dev->pdev);
1228                if (r)
1229                        DRM_WARN("pci_enable_device failed (%d)\n", r);
1230                amdgpu_device_resume(dev, true);
1231
1232                dev->switch_power_state = DRM_SWITCH_POWER_ON;
1233                drm_kms_helper_poll_enable(dev);
1234        } else {
1235                pr_info("switched off\n");
1236                drm_kms_helper_poll_disable(dev);
1237                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1238                amdgpu_device_suspend(dev, true);
1239                pci_save_state(dev->pdev);
1240                /* Shut down the device */
1241                pci_disable_device(dev->pdev);
1242                pci_set_power_state(dev->pdev, PCI_D3cold);
1243                dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1244        }
1245}
1246
1247/**
1248 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1249 *
1250 * @pdev: pci dev pointer
1251 *
1252 * Callback for the switcheroo driver.  Check of the switcheroo
1253 * state can be changed.
1254 * Returns true if the state can be changed, false if not.
1255 */
1256static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1257{
1258        struct drm_device *dev = pci_get_drvdata(pdev);
1259
1260        /*
1261        * FIXME: open_count is protected by drm_global_mutex but that would lead to
1262        * locking inversion with the driver load path. And the access here is
1263        * completely racy anyway. So don't bother with locking for now.
1264        */
1265        return atomic_read(&dev->open_count) == 0;
1266}
1267
1268static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1269        .set_gpu_state = amdgpu_switcheroo_set_state,
1270        .reprobe = NULL,
1271        .can_switch = amdgpu_switcheroo_can_switch,
1272};
1273
1274/**
1275 * amdgpu_device_ip_set_clockgating_state - set the CG state
1276 *
1277 * @dev: amdgpu_device pointer
1278 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1279 * @state: clockgating state (gate or ungate)
1280 *
1281 * Sets the requested clockgating state for all instances of
1282 * the hardware IP specified.
1283 * Returns the error code from the last instance.
1284 */
1285int amdgpu_device_ip_set_clockgating_state(void *dev,
1286                                           enum amd_ip_block_type block_type,
1287                                           enum amd_clockgating_state state)
1288{
1289        struct amdgpu_device *adev = dev;
1290        int i, r = 0;
1291
1292        for (i = 0; i < adev->num_ip_blocks; i++) {
1293                if (!adev->ip_blocks[i].status.valid)
1294                        continue;
1295                if (adev->ip_blocks[i].version->type != block_type)
1296                        continue;
1297                if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1298                        continue;
1299                r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1300                        (void *)adev, state);
1301                if (r)
1302                        DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1303                                  adev->ip_blocks[i].version->funcs->name, r);
1304        }
1305        return r;
1306}
1307
1308/**
1309 * amdgpu_device_ip_set_powergating_state - set the PG state
1310 *
1311 * @dev: amdgpu_device pointer
1312 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1313 * @state: powergating state (gate or ungate)
1314 *
1315 * Sets the requested powergating state for all instances of
1316 * the hardware IP specified.
1317 * Returns the error code from the last instance.
1318 */
1319int amdgpu_device_ip_set_powergating_state(void *dev,
1320                                           enum amd_ip_block_type block_type,
1321                                           enum amd_powergating_state state)
1322{
1323        struct amdgpu_device *adev = dev;
1324        int i, r = 0;
1325
1326        for (i = 0; i < adev->num_ip_blocks; i++) {
1327                if (!adev->ip_blocks[i].status.valid)
1328                        continue;
1329                if (adev->ip_blocks[i].version->type != block_type)
1330                        continue;
1331                if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1332                        continue;
1333                r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1334                        (void *)adev, state);
1335                if (r)
1336                        DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1337                                  adev->ip_blocks[i].version->funcs->name, r);
1338        }
1339        return r;
1340}
1341
1342/**
1343 * amdgpu_device_ip_get_clockgating_state - get the CG state
1344 *
1345 * @adev: amdgpu_device pointer
1346 * @flags: clockgating feature flags
1347 *
1348 * Walks the list of IPs on the device and updates the clockgating
1349 * flags for each IP.
1350 * Updates @flags with the feature flags for each hardware IP where
1351 * clockgating is enabled.
1352 */
1353void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1354                                            u32 *flags)
1355{
1356        int i;
1357
1358        for (i = 0; i < adev->num_ip_blocks; i++) {
1359                if (!adev->ip_blocks[i].status.valid)
1360                        continue;
1361                if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1362                        adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1363        }
1364}
1365
1366/**
1367 * amdgpu_device_ip_wait_for_idle - wait for idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Waits for the request hardware IP to be idle.
1373 * Returns 0 for success or a negative error code on failure.
1374 */
1375int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1376                                   enum amd_ip_block_type block_type)
1377{
1378        int i, r;
1379
1380        for (i = 0; i < adev->num_ip_blocks; i++) {
1381                if (!adev->ip_blocks[i].status.valid)
1382                        continue;
1383                if (adev->ip_blocks[i].version->type == block_type) {
1384                        r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1385                        if (r)
1386                                return r;
1387                        break;
1388                }
1389        }
1390        return 0;
1391
1392}
1393
1394/**
1395 * amdgpu_device_ip_is_idle - is the hardware IP idle
1396 *
1397 * @adev: amdgpu_device pointer
1398 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1399 *
1400 * Check if the hardware IP is idle or not.
1401 * Returns true if it the IP is idle, false if not.
1402 */
1403bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1404                              enum amd_ip_block_type block_type)
1405{
1406        int i;
1407
1408        for (i = 0; i < adev->num_ip_blocks; i++) {
1409                if (!adev->ip_blocks[i].status.valid)
1410                        continue;
1411                if (adev->ip_blocks[i].version->type == block_type)
1412                        return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1413        }
1414        return true;
1415
1416}
1417
1418/**
1419 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1420 *
1421 * @adev: amdgpu_device pointer
1422 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1423 *
1424 * Returns a pointer to the hardware IP block structure
1425 * if it exists for the asic, otherwise NULL.
1426 */
1427struct amdgpu_ip_block *
1428amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1429                              enum amd_ip_block_type type)
1430{
1431        int i;
1432
1433        for (i = 0; i < adev->num_ip_blocks; i++)
1434                if (adev->ip_blocks[i].version->type == type)
1435                        return &adev->ip_blocks[i];
1436
1437        return NULL;
1438}
1439
1440/**
1441 * amdgpu_device_ip_block_version_cmp
1442 *
1443 * @adev: amdgpu_device pointer
1444 * @type: enum amd_ip_block_type
1445 * @major: major version
1446 * @minor: minor version
1447 *
1448 * return 0 if equal or greater
1449 * return 1 if smaller or the ip_block doesn't exist
1450 */
1451int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1452                                       enum amd_ip_block_type type,
1453                                       u32 major, u32 minor)
1454{
1455        struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1456
1457        if (ip_block && ((ip_block->version->major > major) ||
1458                        ((ip_block->version->major == major) &&
1459                        (ip_block->version->minor >= minor))))
1460                return 0;
1461
1462        return 1;
1463}
1464
1465/**
1466 * amdgpu_device_ip_block_add
1467 *
1468 * @adev: amdgpu_device pointer
1469 * @ip_block_version: pointer to the IP to add
1470 *
1471 * Adds the IP block driver information to the collection of IPs
1472 * on the asic.
1473 */
1474int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1475                               const struct amdgpu_ip_block_version *ip_block_version)
1476{
1477        if (!ip_block_version)
1478                return -EINVAL;
1479
1480        DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1481                  ip_block_version->funcs->name);
1482
1483        adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1484
1485        return 0;
1486}
1487
1488/**
1489 * amdgpu_device_enable_virtual_display - enable virtual display feature
1490 *
1491 * @adev: amdgpu_device pointer
1492 *
1493 * Enabled the virtual display feature if the user has enabled it via
1494 * the module parameter virtual_display.  This feature provides a virtual
1495 * display hardware on headless boards or in virtualized environments.
1496 * This function parses and validates the configuration string specified by
1497 * the user and configues the virtual display configuration (number of
1498 * virtual connectors, crtcs, etc.) specified.
1499 */
1500static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1501{
1502        adev->enable_virtual_display = false;
1503
1504        if (amdgpu_virtual_display) {
1505                struct drm_device *ddev = adev->ddev;
1506                const char *pci_address_name = pci_name(ddev->pdev);
1507                char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1508
1509                pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1510                pciaddstr_tmp = pciaddstr;
1511                while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1512                        pciaddname = strsep(&pciaddname_tmp, ",");
1513                        if (!strcmp("all", pciaddname)
1514                            || !strcmp(pci_address_name, pciaddname)) {
1515                                long num_crtc;
1516                                int res = -1;
1517
1518                                adev->enable_virtual_display = true;
1519
1520                                if (pciaddname_tmp)
1521                                        res = kstrtol(pciaddname_tmp, 10,
1522                                                      &num_crtc);
1523
1524                                if (!res) {
1525                                        if (num_crtc < 1)
1526                                                num_crtc = 1;
1527                                        if (num_crtc > 6)
1528                                                num_crtc = 6;
1529                                        adev->mode_info.num_crtc = num_crtc;
1530                                } else {
1531                                        adev->mode_info.num_crtc = 1;
1532                                }
1533                                break;
1534                        }
1535                }
1536
1537                DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1538                         amdgpu_virtual_display, pci_address_name,
1539                         adev->enable_virtual_display, adev->mode_info.num_crtc);
1540
1541                kfree(pciaddstr);
1542        }
1543}
1544
1545/**
1546 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1547 *
1548 * @adev: amdgpu_device pointer
1549 *
1550 * Parses the asic configuration parameters specified in the gpu info
1551 * firmware and makes them availale to the driver for use in configuring
1552 * the asic.
1553 * Returns 0 on success, -EINVAL on failure.
1554 */
1555static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1556{
1557        const char *chip_name;
1558        char fw_name[40];
1559        int err;
1560        const struct gpu_info_firmware_header_v1_0 *hdr;
1561
1562        adev->firmware.gpu_info_fw = NULL;
1563
1564        if (adev->discovery_bin) {
1565                amdgpu_discovery_get_gfx_info(adev);
1566
1567                /*
1568                 * FIXME: The bounding box is still needed by Navi12, so
1569                 * temporarily read it from gpu_info firmware. Should be droped
1570                 * when DAL no longer needs it.
1571                 */
1572                if (adev->asic_type != CHIP_NAVI12)
1573                        return 0;
1574        }
1575
1576        switch (adev->asic_type) {
1577#ifdef CONFIG_DRM_AMDGPU_SI
1578        case CHIP_VERDE:
1579        case CHIP_TAHITI:
1580        case CHIP_PITCAIRN:
1581        case CHIP_OLAND:
1582        case CHIP_HAINAN:
1583#endif
1584#ifdef CONFIG_DRM_AMDGPU_CIK
1585        case CHIP_BONAIRE:
1586        case CHIP_HAWAII:
1587        case CHIP_KAVERI:
1588        case CHIP_KABINI:
1589        case CHIP_MULLINS:
1590#endif
1591        case CHIP_TOPAZ:
1592        case CHIP_TONGA:
1593        case CHIP_FIJI:
1594        case CHIP_POLARIS10:
1595        case CHIP_POLARIS11:
1596        case CHIP_POLARIS12:
1597        case CHIP_VEGAM:
1598        case CHIP_CARRIZO:
1599        case CHIP_STONEY:
1600        case CHIP_VEGA20:
1601        case CHIP_SIENNA_CICHLID:
1602        case CHIP_NAVY_FLOUNDER:
1603        default:
1604                return 0;
1605        case CHIP_VEGA10:
1606                chip_name = "vega10";
1607                break;
1608        case CHIP_VEGA12:
1609                chip_name = "vega12";
1610                break;
1611        case CHIP_RAVEN:
1612                if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1613                        chip_name = "raven2";
1614                else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1615                        chip_name = "picasso";
1616                else
1617                        chip_name = "raven";
1618                break;
1619        case CHIP_ARCTURUS:
1620                chip_name = "arcturus";
1621                break;
1622        case CHIP_RENOIR:
1623                chip_name = "renoir";
1624                break;
1625        case CHIP_NAVI10:
1626                chip_name = "navi10";
1627                break;
1628        case CHIP_NAVI14:
1629                chip_name = "navi14";
1630                break;
1631        case CHIP_NAVI12:
1632                chip_name = "navi12";
1633                break;
1634        }
1635
1636        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1637        err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1638        if (err) {
1639                dev_err(adev->dev,
1640                        "Failed to load gpu_info firmware \"%s\"\n",
1641                        fw_name);
1642                goto out;
1643        }
1644        err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1645        if (err) {
1646                dev_err(adev->dev,
1647                        "Failed to validate gpu_info firmware \"%s\"\n",
1648                        fw_name);
1649                goto out;
1650        }
1651
1652        hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1653        amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1654
1655        switch (hdr->version_major) {
1656        case 1:
1657        {
1658                const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1659                        (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1660                                                                le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1661
1662                /*
1663                 * Should be droped when DAL no longer needs it.
1664                 */
1665                if (adev->asic_type == CHIP_NAVI12)
1666                        goto parse_soc_bounding_box;
1667
1668                adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1669                adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1670                adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1671                adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1672                adev->gfx.config.max_texture_channel_caches =
1673                        le32_to_cpu(gpu_info_fw->gc_num_tccs);
1674                adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1675                adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1676                adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1677                adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1678                adev->gfx.config.double_offchip_lds_buf =
1679                        le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1680                adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1681                adev->gfx.cu_info.max_waves_per_simd =
1682                        le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1683                adev->gfx.cu_info.max_scratch_slots_per_cu =
1684                        le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1685                adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1686                if (hdr->version_minor >= 1) {
1687                        const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1688                                (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1689                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1690                        adev->gfx.config.num_sc_per_sh =
1691                                le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1692                        adev->gfx.config.num_packer_per_sc =
1693                                le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1694                }
1695
1696parse_soc_bounding_box:
1697                /*
1698                 * soc bounding box info is not integrated in disocovery table,
1699                 * we always need to parse it from gpu info firmware if needed.
1700                 */
1701                if (hdr->version_minor == 2) {
1702                        const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1703                                (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1704                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1705                        adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1706                }
1707                break;
1708        }
1709        default:
1710                dev_err(adev->dev,
1711                        "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1712                err = -EINVAL;
1713                goto out;
1714        }
1715out:
1716        return err;
1717}
1718
1719/**
1720 * amdgpu_device_ip_early_init - run early init for hardware IPs
1721 *
1722 * @adev: amdgpu_device pointer
1723 *
1724 * Early initialization pass for hardware IPs.  The hardware IPs that make
1725 * up each asic are discovered each IP's early_init callback is run.  This
1726 * is the first stage in initializing the asic.
1727 * Returns 0 on success, negative error code on failure.
1728 */
1729static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1730{
1731        int i, r;
1732
1733        amdgpu_device_enable_virtual_display(adev);
1734
1735        if (amdgpu_sriov_vf(adev)) {
1736                r = amdgpu_virt_request_full_gpu(adev, true);
1737                if (r)
1738                        return r;
1739        }
1740
1741        switch (adev->asic_type) {
1742#ifdef CONFIG_DRM_AMDGPU_SI
1743        case CHIP_VERDE:
1744        case CHIP_TAHITI:
1745        case CHIP_PITCAIRN:
1746        case CHIP_OLAND:
1747        case CHIP_HAINAN:
1748                adev->family = AMDGPU_FAMILY_SI;
1749                r = si_set_ip_blocks(adev);
1750                if (r)
1751                        return r;
1752                break;
1753#endif
1754#ifdef CONFIG_DRM_AMDGPU_CIK
1755        case CHIP_BONAIRE:
1756        case CHIP_HAWAII:
1757        case CHIP_KAVERI:
1758        case CHIP_KABINI:
1759        case CHIP_MULLINS:
1760                if (adev->flags & AMD_IS_APU)
1761                        adev->family = AMDGPU_FAMILY_KV;
1762                else
1763                        adev->family = AMDGPU_FAMILY_CI;
1764
1765                r = cik_set_ip_blocks(adev);
1766                if (r)
1767                        return r;
1768                break;
1769#endif
1770        case CHIP_TOPAZ:
1771        case CHIP_TONGA:
1772        case CHIP_FIJI:
1773        case CHIP_POLARIS10:
1774        case CHIP_POLARIS11:
1775        case CHIP_POLARIS12:
1776        case CHIP_VEGAM:
1777        case CHIP_CARRIZO:
1778        case CHIP_STONEY:
1779                if (adev->flags & AMD_IS_APU)
1780                        adev->family = AMDGPU_FAMILY_CZ;
1781                else
1782                        adev->family = AMDGPU_FAMILY_VI;
1783
1784                r = vi_set_ip_blocks(adev);
1785                if (r)
1786                        return r;
1787                break;
1788        case CHIP_VEGA10:
1789        case CHIP_VEGA12:
1790        case CHIP_VEGA20:
1791        case CHIP_RAVEN:
1792        case CHIP_ARCTURUS:
1793        case CHIP_RENOIR:
1794                if (adev->flags & AMD_IS_APU)
1795                        adev->family = AMDGPU_FAMILY_RV;
1796                else
1797                        adev->family = AMDGPU_FAMILY_AI;
1798
1799                r = soc15_set_ip_blocks(adev);
1800                if (r)
1801                        return r;
1802                break;
1803        case  CHIP_NAVI10:
1804        case  CHIP_NAVI14:
1805        case  CHIP_NAVI12:
1806        case  CHIP_SIENNA_CICHLID:
1807        case  CHIP_NAVY_FLOUNDER:
1808                adev->family = AMDGPU_FAMILY_NV;
1809
1810                r = nv_set_ip_blocks(adev);
1811                if (r)
1812                        return r;
1813                break;
1814        default:
1815                /* FIXME: not supported yet */
1816                return -EINVAL;
1817        }
1818
1819        amdgpu_amdkfd_device_probe(adev);
1820
1821        adev->pm.pp_feature = amdgpu_pp_feature_mask;
1822        if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1823                adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1824
1825        for (i = 0; i < adev->num_ip_blocks; i++) {
1826                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1827                        DRM_ERROR("disabled ip block: %d <%s>\n",
1828                                  i, adev->ip_blocks[i].version->funcs->name);
1829                        adev->ip_blocks[i].status.valid = false;
1830                } else {
1831                        if (adev->ip_blocks[i].version->funcs->early_init) {
1832                                r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1833                                if (r == -ENOENT) {
1834                                        adev->ip_blocks[i].status.valid = false;
1835                                } else if (r) {
1836                                        DRM_ERROR("early_init of IP block <%s> failed %d\n",
1837                                                  adev->ip_blocks[i].version->funcs->name, r);
1838                                        return r;
1839                                } else {
1840                                        adev->ip_blocks[i].status.valid = true;
1841                                }
1842                        } else {
1843                                adev->ip_blocks[i].status.valid = true;
1844                        }
1845                }
1846                /* get the vbios after the asic_funcs are set up */
1847                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1848                        r = amdgpu_device_parse_gpu_info_fw(adev);
1849                        if (r)
1850                                return r;
1851
1852                        /* Read BIOS */
1853                        if (!amdgpu_get_bios(adev))
1854                                return -EINVAL;
1855
1856                        r = amdgpu_atombios_init(adev);
1857                        if (r) {
1858                                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1859                                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1860                                return r;
1861                        }
1862                }
1863        }
1864
1865        adev->cg_flags &= amdgpu_cg_mask;
1866        adev->pg_flags &= amdgpu_pg_mask;
1867
1868        return 0;
1869}
1870
1871static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1872{
1873        int i, r;
1874
1875        for (i = 0; i < adev->num_ip_blocks; i++) {
1876                if (!adev->ip_blocks[i].status.sw)
1877                        continue;
1878                if (adev->ip_blocks[i].status.hw)
1879                        continue;
1880                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1881                    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1882                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1883                        r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1884                        if (r) {
1885                                DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1886                                          adev->ip_blocks[i].version->funcs->name, r);
1887                                return r;
1888                        }
1889                        adev->ip_blocks[i].status.hw = true;
1890                }
1891        }
1892
1893        return 0;
1894}
1895
1896static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1897{
1898        int i, r;
1899
1900        for (i = 0; i < adev->num_ip_blocks; i++) {
1901                if (!adev->ip_blocks[i].status.sw)
1902                        continue;
1903                if (adev->ip_blocks[i].status.hw)
1904                        continue;
1905                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1906                if (r) {
1907                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1908                                  adev->ip_blocks[i].version->funcs->name, r);
1909                        return r;
1910                }
1911                adev->ip_blocks[i].status.hw = true;
1912        }
1913
1914        return 0;
1915}
1916
1917static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1918{
1919        int r = 0;
1920        int i;
1921        uint32_t smu_version;
1922
1923        if (adev->asic_type >= CHIP_VEGA10) {
1924                for (i = 0; i < adev->num_ip_blocks; i++) {
1925                        if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1926                                continue;
1927
1928                        /* no need to do the fw loading again if already done*/
1929                        if (adev->ip_blocks[i].status.hw == true)
1930                                break;
1931
1932                        if (adev->in_gpu_reset || adev->in_suspend) {
1933                                r = adev->ip_blocks[i].version->funcs->resume(adev);
1934                                if (r) {
1935                                        DRM_ERROR("resume of IP block <%s> failed %d\n",
1936                                                          adev->ip_blocks[i].version->funcs->name, r);
1937                                        return r;
1938                                }
1939                        } else {
1940                                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1941                                if (r) {
1942                                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1943                                                          adev->ip_blocks[i].version->funcs->name, r);
1944                                        return r;
1945                                }
1946                        }
1947
1948                        adev->ip_blocks[i].status.hw = true;
1949                        break;
1950                }
1951        }
1952
1953        if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1954                r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1955
1956        return r;
1957}
1958
1959/**
1960 * amdgpu_device_ip_init - run init for hardware IPs
1961 *
1962 * @adev: amdgpu_device pointer
1963 *
1964 * Main initialization pass for hardware IPs.  The list of all the hardware
1965 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1966 * are run.  sw_init initializes the software state associated with each IP
1967 * and hw_init initializes the hardware associated with each IP.
1968 * Returns 0 on success, negative error code on failure.
1969 */
1970static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1971{
1972        int i, r;
1973
1974        r = amdgpu_ras_init(adev);
1975        if (r)
1976                return r;
1977
1978        for (i = 0; i < adev->num_ip_blocks; i++) {
1979                if (!adev->ip_blocks[i].status.valid)
1980                        continue;
1981                r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1982                if (r) {
1983                        DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1984                                  adev->ip_blocks[i].version->funcs->name, r);
1985                        goto init_failed;
1986                }
1987                adev->ip_blocks[i].status.sw = true;
1988
1989                /* need to do gmc hw init early so we can allocate gpu mem */
1990                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1991                        r = amdgpu_device_vram_scratch_init(adev);
1992                        if (r) {
1993                                DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1994                                goto init_failed;
1995                        }
1996                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1997                        if (r) {
1998                                DRM_ERROR("hw_init %d failed %d\n", i, r);
1999                                goto init_failed;
2000                        }
2001                        r = amdgpu_device_wb_init(adev);
2002                        if (r) {
2003                                DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2004                                goto init_failed;
2005                        }
2006                        adev->ip_blocks[i].status.hw = true;
2007
2008                        /* right after GMC hw init, we create CSA */
2009                        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2010                                r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2011                                                                AMDGPU_GEM_DOMAIN_VRAM,
2012                                                                AMDGPU_CSA_SIZE);
2013                                if (r) {
2014                                        DRM_ERROR("allocate CSA failed %d\n", r);
2015                                        goto init_failed;
2016                                }
2017                        }
2018                }
2019        }
2020
2021        if (amdgpu_sriov_vf(adev))
2022                amdgpu_virt_init_data_exchange(adev);
2023
2024        r = amdgpu_ib_pool_init(adev);
2025        if (r) {
2026                dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2027                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2028                goto init_failed;
2029        }
2030
2031        r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2032        if (r)
2033                goto init_failed;
2034
2035        r = amdgpu_device_ip_hw_init_phase1(adev);
2036        if (r)
2037                goto init_failed;
2038
2039        r = amdgpu_device_fw_loading(adev);
2040        if (r)
2041                goto init_failed;
2042
2043        r = amdgpu_device_ip_hw_init_phase2(adev);
2044        if (r)
2045                goto init_failed;
2046
2047        /*
2048         * retired pages will be loaded from eeprom and reserved here,
2049         * it should be called after amdgpu_device_ip_hw_init_phase2  since
2050         * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2051         * for I2C communication which only true at this point.
2052         * recovery_init may fail, but it can free all resources allocated by
2053         * itself and its failure should not stop amdgpu init process.
2054         *
2055         * Note: theoretically, this should be called before all vram allocations
2056         * to protect retired page from abusing
2057         */
2058        amdgpu_ras_recovery_init(adev);
2059
2060        if (adev->gmc.xgmi.num_physical_nodes > 1)
2061                amdgpu_xgmi_add_device(adev);
2062        amdgpu_amdkfd_device_init(adev);
2063
2064        amdgpu_fru_get_product_info(adev);
2065
2066init_failed:
2067        if (amdgpu_sriov_vf(adev))
2068                amdgpu_virt_release_full_gpu(adev, true);
2069
2070        return r;
2071}
2072
2073/**
2074 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2075 *
2076 * @adev: amdgpu_device pointer
2077 *
2078 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2079 * this function before a GPU reset.  If the value is retained after a
2080 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2081 */
2082static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2083{
2084        memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2085}
2086
2087/**
2088 * amdgpu_device_check_vram_lost - check if vram is valid
2089 *
2090 * @adev: amdgpu_device pointer
2091 *
2092 * Checks the reset magic value written to the gart pointer in VRAM.
2093 * The driver calls this after a GPU reset to see if the contents of
2094 * VRAM is lost or now.
2095 * returns true if vram is lost, false if not.
2096 */
2097static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2098{
2099        if (memcmp(adev->gart.ptr, adev->reset_magic,
2100                        AMDGPU_RESET_MAGIC_NUM))
2101                return true;
2102
2103        if (!adev->in_gpu_reset)
2104                return false;
2105
2106        /*
2107         * For all ASICs with baco/mode1 reset, the VRAM is
2108         * always assumed to be lost.
2109         */
2110        switch (amdgpu_asic_reset_method(adev)) {
2111        case AMD_RESET_METHOD_BACO:
2112        case AMD_RESET_METHOD_MODE1:
2113                return true;
2114        default:
2115                return false;
2116        }
2117}
2118
2119/**
2120 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2121 *
2122 * @adev: amdgpu_device pointer
2123 * @state: clockgating state (gate or ungate)
2124 *
2125 * The list of all the hardware IPs that make up the asic is walked and the
2126 * set_clockgating_state callbacks are run.
2127 * Late initialization pass enabling clockgating for hardware IPs.
2128 * Fini or suspend, pass disabling clockgating for hardware IPs.
2129 * Returns 0 on success, negative error code on failure.
2130 */
2131
2132static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2133                                                enum amd_clockgating_state state)
2134{
2135        int i, j, r;
2136
2137        if (amdgpu_emu_mode == 1)
2138                return 0;
2139
2140        for (j = 0; j < adev->num_ip_blocks; j++) {
2141                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2142                if (!adev->ip_blocks[i].status.late_initialized)
2143                        continue;
2144                /* skip CG for VCE/UVD, it's handled specially */
2145                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2146                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2147                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2148                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2149                    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2150                        /* enable clockgating to save power */
2151                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2152                                                                                     state);
2153                        if (r) {
2154                                DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2155                                          adev->ip_blocks[i].version->funcs->name, r);
2156                                return r;
2157                        }
2158                }
2159        }
2160
2161        return 0;
2162}
2163
2164static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2165{
2166        int i, j, r;
2167
2168        if (amdgpu_emu_mode == 1)
2169                return 0;
2170
2171        for (j = 0; j < adev->num_ip_blocks; j++) {
2172                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2173                if (!adev->ip_blocks[i].status.late_initialized)
2174                        continue;
2175                /* skip CG for VCE/UVD, it's handled specially */
2176                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2177                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2178                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2179                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2180                    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2181                        /* enable powergating to save power */
2182                        r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2183                                                                                        state);
2184                        if (r) {
2185                                DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2186                                          adev->ip_blocks[i].version->funcs->name, r);
2187                                return r;
2188                        }
2189                }
2190        }
2191        return 0;
2192}
2193
2194static int amdgpu_device_enable_mgpu_fan_boost(void)
2195{
2196        struct amdgpu_gpu_instance *gpu_ins;
2197        struct amdgpu_device *adev;
2198        int i, ret = 0;
2199
2200        mutex_lock(&mgpu_info.mutex);
2201
2202        /*
2203         * MGPU fan boost feature should be enabled
2204         * only when there are two or more dGPUs in
2205         * the system
2206         */
2207        if (mgpu_info.num_dgpu < 2)
2208                goto out;
2209
2210        for (i = 0; i < mgpu_info.num_dgpu; i++) {
2211                gpu_ins = &(mgpu_info.gpu_ins[i]);
2212                adev = gpu_ins->adev;
2213                if (!(adev->flags & AMD_IS_APU) &&
2214                    !gpu_ins->mgpu_fan_enabled &&
2215                    adev->powerplay.pp_funcs &&
2216                    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2217                        ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2218                        if (ret)
2219                                break;
2220
2221                        gpu_ins->mgpu_fan_enabled = 1;
2222                }
2223        }
2224
2225out:
2226        mutex_unlock(&mgpu_info.mutex);
2227
2228        return ret;
2229}
2230
2231/**
2232 * amdgpu_device_ip_late_init - run late init for hardware IPs
2233 *
2234 * @adev: amdgpu_device pointer
2235 *
2236 * Late initialization pass for hardware IPs.  The list of all the hardware
2237 * IPs that make up the asic is walked and the late_init callbacks are run.
2238 * late_init covers any special initialization that an IP requires
2239 * after all of the have been initialized or something that needs to happen
2240 * late in the init process.
2241 * Returns 0 on success, negative error code on failure.
2242 */
2243static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2244{
2245        struct amdgpu_gpu_instance *gpu_instance;
2246        int i = 0, r;
2247
2248        for (i = 0; i < adev->num_ip_blocks; i++) {
2249                if (!adev->ip_blocks[i].status.hw)
2250                        continue;
2251                if (adev->ip_blocks[i].version->funcs->late_init) {
2252                        r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2253                        if (r) {
2254                                DRM_ERROR("late_init of IP block <%s> failed %d\n",
2255                                          adev->ip_blocks[i].version->funcs->name, r);
2256                                return r;
2257                        }
2258                }
2259                adev->ip_blocks[i].status.late_initialized = true;
2260        }
2261
2262        amdgpu_ras_set_error_query_ready(adev, true);
2263
2264        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2265        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2266
2267        amdgpu_device_fill_reset_magic(adev);
2268
2269        r = amdgpu_device_enable_mgpu_fan_boost();
2270        if (r)
2271                DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2272
2273
2274        if (adev->gmc.xgmi.num_physical_nodes > 1) {
2275                mutex_lock(&mgpu_info.mutex);
2276
2277                /*
2278                 * Reset device p-state to low as this was booted with high.
2279                 *
2280                 * This should be performed only after all devices from the same
2281                 * hive get initialized.
2282                 *
2283                 * However, it's unknown how many device in the hive in advance.
2284                 * As this is counted one by one during devices initializations.
2285                 *
2286                 * So, we wait for all XGMI interlinked devices initialized.
2287                 * This may bring some delays as those devices may come from
2288                 * different hives. But that should be OK.
2289                 */
2290                if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2291                        for (i = 0; i < mgpu_info.num_gpu; i++) {
2292                                gpu_instance = &(mgpu_info.gpu_ins[i]);
2293                                if (gpu_instance->adev->flags & AMD_IS_APU)
2294                                        continue;
2295
2296                                r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2297                                                AMDGPU_XGMI_PSTATE_MIN);
2298                                if (r) {
2299                                        DRM_ERROR("pstate setting failed (%d).\n", r);
2300                                        break;
2301                                }
2302                        }
2303                }
2304
2305                mutex_unlock(&mgpu_info.mutex);
2306        }
2307
2308        return 0;
2309}
2310
2311/**
2312 * amdgpu_device_ip_fini - run fini for hardware IPs
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main teardown pass for hardware IPs.  The list of all the hardware
2317 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2318 * are run.  hw_fini tears down the hardware associated with each IP
2319 * and sw_fini tears down any software state associated with each IP.
2320 * Returns 0 on success, negative error code on failure.
2321 */
2322static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2323{
2324        int i, r;
2325
2326        if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2327                amdgpu_virt_release_ras_err_handler_data(adev);
2328
2329        amdgpu_ras_pre_fini(adev);
2330
2331        if (adev->gmc.xgmi.num_physical_nodes > 1)
2332                amdgpu_xgmi_remove_device(adev);
2333
2334        amdgpu_amdkfd_device_fini(adev);
2335
2336        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2337        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2338
2339        /* need to disable SMC first */
2340        for (i = 0; i < adev->num_ip_blocks; i++) {
2341                if (!adev->ip_blocks[i].status.hw)
2342                        continue;
2343                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2344                        r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2345                        /* XXX handle errors */
2346                        if (r) {
2347                                DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2348                                          adev->ip_blocks[i].version->funcs->name, r);
2349                        }
2350                        adev->ip_blocks[i].status.hw = false;
2351                        break;
2352                }
2353        }
2354
2355        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2356                if (!adev->ip_blocks[i].status.hw)
2357                        continue;
2358
2359                r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2360                /* XXX handle errors */
2361                if (r) {
2362                        DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2363                                  adev->ip_blocks[i].version->funcs->name, r);
2364                }
2365
2366                adev->ip_blocks[i].status.hw = false;
2367        }
2368
2369
2370        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2371                if (!adev->ip_blocks[i].status.sw)
2372                        continue;
2373
2374                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2375                        amdgpu_ucode_free_bo(adev);
2376                        amdgpu_free_static_csa(&adev->virt.csa_obj);
2377                        amdgpu_device_wb_fini(adev);
2378                        amdgpu_device_vram_scratch_fini(adev);
2379                        amdgpu_ib_pool_fini(adev);
2380                }
2381
2382                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2383                /* XXX handle errors */
2384                if (r) {
2385                        DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2386                                  adev->ip_blocks[i].version->funcs->name, r);
2387                }
2388                adev->ip_blocks[i].status.sw = false;
2389                adev->ip_blocks[i].status.valid = false;
2390        }
2391
2392        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2393                if (!adev->ip_blocks[i].status.late_initialized)
2394                        continue;
2395                if (adev->ip_blocks[i].version->funcs->late_fini)
2396                        adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2397                adev->ip_blocks[i].status.late_initialized = false;
2398        }
2399
2400        amdgpu_ras_fini(adev);
2401
2402        if (amdgpu_sriov_vf(adev))
2403                if (amdgpu_virt_release_full_gpu(adev, false))
2404                        DRM_ERROR("failed to release exclusive mode on fini\n");
2405
2406        return 0;
2407}
2408
2409/**
2410 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2411 *
2412 * @work: work_struct.
2413 */
2414static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2415{
2416        struct amdgpu_device *adev =
2417                container_of(work, struct amdgpu_device, delayed_init_work.work);
2418        int r;
2419
2420        r = amdgpu_ib_ring_tests(adev);
2421        if (r)
2422                DRM_ERROR("ib ring test failed (%d).\n", r);
2423}
2424
2425static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2426{
2427        struct amdgpu_device *adev =
2428                container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2429
2430        mutex_lock(&adev->gfx.gfx_off_mutex);
2431        if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2432                if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2433                        adev->gfx.gfx_off_state = true;
2434        }
2435        mutex_unlock(&adev->gfx.gfx_off_mutex);
2436}
2437
2438/**
2439 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2440 *
2441 * @adev: amdgpu_device pointer
2442 *
2443 * Main suspend function for hardware IPs.  The list of all the hardware
2444 * IPs that make up the asic is walked, clockgating is disabled and the
2445 * suspend callbacks are run.  suspend puts the hardware and software state
2446 * in each IP into a state suitable for suspend.
2447 * Returns 0 on success, negative error code on failure.
2448 */
2449static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2450{
2451        int i, r;
2452
2453        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2454        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2455
2456        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2457                if (!adev->ip_blocks[i].status.valid)
2458                        continue;
2459
2460                /* displays are handled separately */
2461                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2462                        continue;
2463
2464                /* XXX handle errors */
2465                r = adev->ip_blocks[i].version->funcs->suspend(adev);
2466                /* XXX handle errors */
2467                if (r) {
2468                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
2469                                  adev->ip_blocks[i].version->funcs->name, r);
2470                        return r;
2471                }
2472
2473                adev->ip_blocks[i].status.hw = false;
2474        }
2475
2476        return 0;
2477}
2478
2479/**
2480 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2481 *
2482 * @adev: amdgpu_device pointer
2483 *
2484 * Main suspend function for hardware IPs.  The list of all the hardware
2485 * IPs that make up the asic is walked, clockgating is disabled and the
2486 * suspend callbacks are run.  suspend puts the hardware and software state
2487 * in each IP into a state suitable for suspend.
2488 * Returns 0 on success, negative error code on failure.
2489 */
2490static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2491{
2492        int i, r;
2493
2494        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2495                if (!adev->ip_blocks[i].status.valid)
2496                        continue;
2497                /* displays are handled in phase1 */
2498                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2499                        continue;
2500                /* PSP lost connection when err_event_athub occurs */
2501                if (amdgpu_ras_intr_triggered() &&
2502                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2503                        adev->ip_blocks[i].status.hw = false;
2504                        continue;
2505                }
2506                /* XXX handle errors */
2507                r = adev->ip_blocks[i].version->funcs->suspend(adev);
2508                /* XXX handle errors */
2509                if (r) {
2510                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
2511                                  adev->ip_blocks[i].version->funcs->name, r);
2512                }
2513                adev->ip_blocks[i].status.hw = false;
2514                /* handle putting the SMC in the appropriate state */
2515                if(!amdgpu_sriov_vf(adev)){
2516                        if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2517                                r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2518                                if (r) {
2519                                        DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2520                                                        adev->mp1_state, r);
2521                                        return r;
2522                                }
2523                        }
2524                }
2525                adev->ip_blocks[i].status.hw = false;
2526        }
2527
2528        return 0;
2529}
2530
2531/**
2532 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2533 *
2534 * @adev: amdgpu_device pointer
2535 *
2536 * Main suspend function for hardware IPs.  The list of all the hardware
2537 * IPs that make up the asic is walked, clockgating is disabled and the
2538 * suspend callbacks are run.  suspend puts the hardware and software state
2539 * in each IP into a state suitable for suspend.
2540 * Returns 0 on success, negative error code on failure.
2541 */
2542int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2543{
2544        int r;
2545
2546        if (amdgpu_sriov_vf(adev))
2547                amdgpu_virt_request_full_gpu(adev, false);
2548
2549        r = amdgpu_device_ip_suspend_phase1(adev);
2550        if (r)
2551                return r;
2552        r = amdgpu_device_ip_suspend_phase2(adev);
2553
2554        if (amdgpu_sriov_vf(adev))
2555                amdgpu_virt_release_full_gpu(adev, false);
2556
2557        return r;
2558}
2559
2560static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2561{
2562        int i, r;
2563
2564        static enum amd_ip_block_type ip_order[] = {
2565                AMD_IP_BLOCK_TYPE_GMC,
2566                AMD_IP_BLOCK_TYPE_COMMON,
2567                AMD_IP_BLOCK_TYPE_PSP,
2568                AMD_IP_BLOCK_TYPE_IH,
2569        };
2570
2571        for (i = 0; i < adev->num_ip_blocks; i++)
2572                adev->ip_blocks[i].status.hw = false;
2573
2574        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2575                int j;
2576                struct amdgpu_ip_block *block;
2577
2578                for (j = 0; j < adev->num_ip_blocks; j++) {
2579                        block = &adev->ip_blocks[j];
2580
2581                        if (block->version->type != ip_order[i] ||
2582                                !block->status.valid)
2583                                continue;
2584
2585                        r = block->version->funcs->hw_init(adev);
2586                        DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2587                        if (r)
2588                                return r;
2589                        block->status.hw = true;
2590                }
2591        }
2592
2593        return 0;
2594}
2595
2596static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2597{
2598        int i, r;
2599
2600        static enum amd_ip_block_type ip_order[] = {
2601                AMD_IP_BLOCK_TYPE_SMC,
2602                AMD_IP_BLOCK_TYPE_DCE,
2603                AMD_IP_BLOCK_TYPE_GFX,
2604                AMD_IP_BLOCK_TYPE_SDMA,
2605                AMD_IP_BLOCK_TYPE_UVD,
2606                AMD_IP_BLOCK_TYPE_VCE,
2607                AMD_IP_BLOCK_TYPE_VCN
2608        };
2609
2610        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2611                int j;
2612                struct amdgpu_ip_block *block;
2613
2614                for (j = 0; j < adev->num_ip_blocks; j++) {
2615                        block = &adev->ip_blocks[j];
2616
2617                        if (block->version->type != ip_order[i] ||
2618                                !block->status.valid ||
2619                                block->status.hw)
2620                                continue;
2621
2622                        if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2623                                r = block->version->funcs->resume(adev);
2624                        else
2625                                r = block->version->funcs->hw_init(adev);
2626
2627                        DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2628                        if (r)
2629                                return r;
2630                        block->status.hw = true;
2631                }
2632        }
2633
2634        return 0;
2635}
2636
2637/**
2638 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2639 *
2640 * @adev: amdgpu_device pointer
2641 *
2642 * First resume function for hardware IPs.  The list of all the hardware
2643 * IPs that make up the asic is walked and the resume callbacks are run for
2644 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2645 * after a suspend and updates the software state as necessary.  This
2646 * function is also used for restoring the GPU after a GPU reset.
2647 * Returns 0 on success, negative error code on failure.
2648 */
2649static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2650{
2651        int i, r;
2652
2653        for (i = 0; i < adev->num_ip_blocks; i++) {
2654                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2655                        continue;
2656                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2657                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2658                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2659
2660                        r = adev->ip_blocks[i].version->funcs->resume(adev);
2661                        if (r) {
2662                                DRM_ERROR("resume of IP block <%s> failed %d\n",
2663                                          adev->ip_blocks[i].version->funcs->name, r);
2664                                return r;
2665                        }
2666                        adev->ip_blocks[i].status.hw = true;
2667                }
2668        }
2669
2670        return 0;
2671}
2672
2673/**
2674 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2675 *
2676 * @adev: amdgpu_device pointer
2677 *
2678 * First resume function for hardware IPs.  The list of all the hardware
2679 * IPs that make up the asic is walked and the resume callbacks are run for
2680 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2681 * functional state after a suspend and updates the software state as
2682 * necessary.  This function is also used for restoring the GPU after a GPU
2683 * reset.
2684 * Returns 0 on success, negative error code on failure.
2685 */
2686static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2687{
2688        int i, r;
2689
2690        for (i = 0; i < adev->num_ip_blocks; i++) {
2691                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2692                        continue;
2693                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2694                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2695                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2696                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2697                        continue;
2698                r = adev->ip_blocks[i].version->funcs->resume(adev);
2699                if (r) {
2700                        DRM_ERROR("resume of IP block <%s> failed %d\n",
2701                                  adev->ip_blocks[i].version->funcs->name, r);
2702                        return r;
2703                }
2704                adev->ip_blocks[i].status.hw = true;
2705        }
2706
2707        return 0;
2708}
2709
2710/**
2711 * amdgpu_device_ip_resume - run resume for hardware IPs
2712 *
2713 * @adev: amdgpu_device pointer
2714 *
2715 * Main resume function for hardware IPs.  The hardware IPs
2716 * are split into two resume functions because they are
2717 * are also used in in recovering from a GPU reset and some additional
2718 * steps need to be take between them.  In this case (S3/S4) they are
2719 * run sequentially.
2720 * Returns 0 on success, negative error code on failure.
2721 */
2722static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2723{
2724        int r;
2725
2726        r = amdgpu_device_ip_resume_phase1(adev);
2727        if (r)
2728                return r;
2729
2730        r = amdgpu_device_fw_loading(adev);
2731        if (r)
2732                return r;
2733
2734        r = amdgpu_device_ip_resume_phase2(adev);
2735
2736        return r;
2737}
2738
2739/**
2740 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2741 *
2742 * @adev: amdgpu_device pointer
2743 *
2744 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2745 */
2746static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2747{
2748        if (amdgpu_sriov_vf(adev)) {
2749                if (adev->is_atom_fw) {
2750                        if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2751                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2752                } else {
2753                        if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2754                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2755                }
2756
2757                if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2758                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2759        }
2760}
2761
2762/**
2763 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2764 *
2765 * @asic_type: AMD asic type
2766 *
2767 * Check if there is DC (new modesetting infrastructre) support for an asic.
2768 * returns true if DC has support, false if not.
2769 */
2770bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2771{
2772        switch (asic_type) {
2773#if defined(CONFIG_DRM_AMD_DC)
2774        case CHIP_BONAIRE:
2775        case CHIP_KAVERI:
2776        case CHIP_KABINI:
2777        case CHIP_MULLINS:
2778                /*
2779                 * We have systems in the wild with these ASICs that require
2780                 * LVDS and VGA support which is not supported with DC.
2781                 *
2782                 * Fallback to the non-DC driver here by default so as not to
2783                 * cause regressions.
2784                 */
2785                return amdgpu_dc > 0;
2786        case CHIP_HAWAII:
2787        case CHIP_CARRIZO:
2788        case CHIP_STONEY:
2789        case CHIP_POLARIS10:
2790        case CHIP_POLARIS11:
2791        case CHIP_POLARIS12:
2792        case CHIP_VEGAM:
2793        case CHIP_TONGA:
2794        case CHIP_FIJI:
2795        case CHIP_VEGA10:
2796        case CHIP_VEGA12:
2797        case CHIP_VEGA20:
2798#if defined(CONFIG_DRM_AMD_DC_DCN)
2799        case CHIP_RAVEN:
2800        case CHIP_NAVI10:
2801        case CHIP_NAVI14:
2802        case CHIP_NAVI12:
2803        case CHIP_RENOIR:
2804#endif
2805#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2806        case CHIP_SIENNA_CICHLID:
2807        case CHIP_NAVY_FLOUNDER:
2808#endif
2809                return amdgpu_dc != 0;
2810#endif
2811        default:
2812                if (amdgpu_dc > 0)
2813                        DRM_INFO("Display Core has been requested via kernel parameter "
2814                                         "but isn't supported by ASIC, ignoring\n");
2815                return false;
2816        }
2817}
2818
2819/**
2820 * amdgpu_device_has_dc_support - check if dc is supported
2821 *
2822 * @adev: amdgpu_device_pointer
2823 *
2824 * Returns true for supported, false for not supported
2825 */
2826bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2827{
2828        if (amdgpu_sriov_vf(adev))
2829                return false;
2830
2831        return amdgpu_device_asic_has_dc_support(adev->asic_type);
2832}
2833
2834
2835static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2836{
2837        struct amdgpu_device *adev =
2838                container_of(__work, struct amdgpu_device, xgmi_reset_work);
2839        struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
2840
2841        /* It's a bug to not have a hive within this function */
2842        if (WARN_ON(!hive))
2843                return;
2844
2845        /*
2846         * Use task barrier to synchronize all xgmi reset works across the
2847         * hive. task_barrier_enter and task_barrier_exit will block
2848         * until all the threads running the xgmi reset works reach
2849         * those points. task_barrier_full will do both blocks.
2850         */
2851        if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2852
2853                task_barrier_enter(&hive->tb);
2854                adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2855
2856                if (adev->asic_reset_res)
2857                        goto fail;
2858
2859                task_barrier_exit(&hive->tb);
2860                adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2861
2862                if (adev->asic_reset_res)
2863                        goto fail;
2864
2865                if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2866                        adev->mmhub.funcs->reset_ras_error_count(adev);
2867        } else {
2868
2869                task_barrier_full(&hive->tb);
2870                adev->asic_reset_res =  amdgpu_asic_reset(adev);
2871        }
2872
2873fail:
2874        if (adev->asic_reset_res)
2875                DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2876                         adev->asic_reset_res, adev->ddev->unique);
2877}
2878
2879static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2880{
2881        char *input = amdgpu_lockup_timeout;
2882        char *timeout_setting = NULL;
2883        int index = 0;
2884        long timeout;
2885        int ret = 0;
2886
2887        /*
2888         * By default timeout for non compute jobs is 10000.
2889         * And there is no timeout enforced on compute jobs.
2890         * In SR-IOV or passthrough mode, timeout for compute
2891         * jobs are 60000 by default.
2892         */
2893        adev->gfx_timeout = msecs_to_jiffies(10000);
2894        adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2895        if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2896                adev->compute_timeout =  msecs_to_jiffies(60000);
2897        else
2898                adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2899
2900        if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2901                while ((timeout_setting = strsep(&input, ",")) &&
2902                                strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2903                        ret = kstrtol(timeout_setting, 0, &timeout);
2904                        if (ret)
2905                                return ret;
2906
2907                        if (timeout == 0) {
2908                                index++;
2909                                continue;
2910                        } else if (timeout < 0) {
2911                                timeout = MAX_SCHEDULE_TIMEOUT;
2912                        } else {
2913                                timeout = msecs_to_jiffies(timeout);
2914                        }
2915
2916                        switch (index++) {
2917                        case 0:
2918                                adev->gfx_timeout = timeout;
2919                                break;
2920                        case 1:
2921                                adev->compute_timeout = timeout;
2922                                break;
2923                        case 2:
2924                                adev->sdma_timeout = timeout;
2925                                break;
2926                        case 3:
2927                                adev->video_timeout = timeout;
2928                                break;
2929                        default:
2930                                break;
2931                        }
2932                }
2933                /*
2934                 * There is only one value specified and
2935                 * it should apply to all non-compute jobs.
2936                 */
2937                if (index == 1) {
2938                        adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2939                        if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2940                                adev->compute_timeout = adev->gfx_timeout;
2941                }
2942        }
2943
2944        return ret;
2945}
2946
2947static const struct attribute *amdgpu_dev_attributes[] = {
2948        &dev_attr_product_name.attr,
2949        &dev_attr_product_number.attr,
2950        &dev_attr_serial_number.attr,
2951        &dev_attr_pcie_replay_count.attr,
2952        NULL
2953};
2954
2955/**
2956 * amdgpu_device_init - initialize the driver
2957 *
2958 * @adev: amdgpu_device pointer
2959 * @ddev: drm dev pointer
2960 * @pdev: pci dev pointer
2961 * @flags: driver flags
2962 *
2963 * Initializes the driver info and hw (all asics).
2964 * Returns 0 for success or an error on failure.
2965 * Called at driver startup.
2966 */
2967int amdgpu_device_init(struct amdgpu_device *adev,
2968                       struct drm_device *ddev,
2969                       struct pci_dev *pdev,
2970                       uint32_t flags)
2971{
2972        int r, i;
2973        bool boco = false;
2974        u32 max_MBps;
2975
2976        adev->shutdown = false;
2977        adev->dev = &pdev->dev;
2978        adev->ddev = ddev;
2979        adev->pdev = pdev;
2980        adev->flags = flags;
2981
2982        if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2983                adev->asic_type = amdgpu_force_asic_type;
2984        else
2985                adev->asic_type = flags & AMD_ASIC_MASK;
2986
2987        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2988        if (amdgpu_emu_mode == 1)
2989                adev->usec_timeout *= 10;
2990        adev->gmc.gart_size = 512 * 1024 * 1024;
2991        adev->accel_working = false;
2992        adev->num_rings = 0;
2993        adev->mman.buffer_funcs = NULL;
2994        adev->mman.buffer_funcs_ring = NULL;
2995        adev->vm_manager.vm_pte_funcs = NULL;
2996        adev->vm_manager.vm_pte_num_scheds = 0;
2997        adev->gmc.gmc_funcs = NULL;
2998        adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2999        bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3000
3001        adev->smc_rreg = &amdgpu_invalid_rreg;
3002        adev->smc_wreg = &amdgpu_invalid_wreg;
3003        adev->pcie_rreg = &amdgpu_invalid_rreg;
3004        adev->pcie_wreg = &amdgpu_invalid_wreg;
3005        adev->pciep_rreg = &amdgpu_invalid_rreg;
3006        adev->pciep_wreg = &amdgpu_invalid_wreg;
3007        adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3008        adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3009        adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3010        adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3011        adev->didt_rreg = &amdgpu_invalid_rreg;
3012        adev->didt_wreg = &amdgpu_invalid_wreg;
3013        adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3014        adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3015        adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3016        adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3017
3018        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3019                 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3020                 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3021
3022        /* mutex initialization are all done here so we
3023         * can recall function without having locking issues */
3024        atomic_set(&adev->irq.ih.lock, 0);
3025        mutex_init(&adev->firmware.mutex);
3026        mutex_init(&adev->pm.mutex);
3027        mutex_init(&adev->gfx.gpu_clock_mutex);
3028        mutex_init(&adev->srbm_mutex);
3029        mutex_init(&adev->gfx.pipe_reserve_mutex);
3030        mutex_init(&adev->gfx.gfx_off_mutex);
3031        mutex_init(&adev->grbm_idx_mutex);
3032        mutex_init(&adev->mn_lock);
3033        mutex_init(&adev->virt.vf_errors.lock);
3034        hash_init(adev->mn_hash);
3035        mutex_init(&adev->lock_reset);
3036        mutex_init(&adev->psp.mutex);
3037        mutex_init(&adev->notifier_lock);
3038
3039        r = amdgpu_device_check_arguments(adev);
3040        if (r)
3041                return r;
3042
3043        spin_lock_init(&adev->mmio_idx_lock);
3044        spin_lock_init(&adev->smc_idx_lock);
3045        spin_lock_init(&adev->pcie_idx_lock);
3046        spin_lock_init(&adev->uvd_ctx_idx_lock);
3047        spin_lock_init(&adev->didt_idx_lock);
3048        spin_lock_init(&adev->gc_cac_idx_lock);
3049        spin_lock_init(&adev->se_cac_idx_lock);
3050        spin_lock_init(&adev->audio_endpt_idx_lock);
3051        spin_lock_init(&adev->mm_stats.lock);
3052
3053        INIT_LIST_HEAD(&adev->shadow_list);
3054        mutex_init(&adev->shadow_list_lock);
3055
3056        INIT_DELAYED_WORK(&adev->delayed_init_work,
3057                          amdgpu_device_delayed_init_work_handler);
3058        INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3059                          amdgpu_device_delay_enable_gfx_off);
3060
3061        INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3062
3063        adev->gfx.gfx_off_req_count = 1;
3064        adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3065
3066        atomic_set(&adev->throttling_logging_enabled, 1);
3067        /*
3068         * If throttling continues, logging will be performed every minute
3069         * to avoid log flooding. "-1" is subtracted since the thermal
3070         * throttling interrupt comes every second. Thus, the total logging
3071         * interval is 59 seconds(retelimited printk interval) + 1(waiting
3072         * for throttling interrupt) = 60 seconds.
3073         */
3074        ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3075        ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3076
3077        /* Registers mapping */
3078        /* TODO: block userspace mapping of io register */
3079        if (adev->asic_type >= CHIP_BONAIRE) {
3080                adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3081                adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3082        } else {
3083                adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3084                adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3085        }
3086
3087        adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3088        if (adev->rmmio == NULL) {
3089                return -ENOMEM;
3090        }
3091        DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3092        DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3093
3094        /* io port mapping */
3095        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3096                if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3097                        adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3098                        adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3099                        break;
3100                }
3101        }
3102        if (adev->rio_mem == NULL)
3103                DRM_INFO("PCI I/O BAR is not found.\n");
3104
3105        /* enable PCIE atomic ops */
3106        r = pci_enable_atomic_ops_to_root(adev->pdev,
3107                                          PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3108                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3109        if (r) {
3110                adev->have_atomics_support = false;
3111                DRM_INFO("PCIE atomic ops is not supported\n");
3112        } else {
3113                adev->have_atomics_support = true;
3114        }
3115
3116        amdgpu_device_get_pcie_info(adev);
3117
3118        if (amdgpu_mcbp)
3119                DRM_INFO("MCBP is enabled\n");
3120
3121        if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3122                adev->enable_mes = true;
3123
3124        /* detect hw virtualization here */
3125        amdgpu_detect_virtualization(adev);
3126
3127        r = amdgpu_device_get_job_timeout_settings(adev);
3128        if (r) {
3129                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3130                return r;
3131        }
3132
3133        /* early init functions */
3134        r = amdgpu_device_ip_early_init(adev);
3135        if (r)
3136                return r;
3137
3138        /* doorbell bar mapping and doorbell index init*/
3139        amdgpu_device_doorbell_init(adev);
3140
3141        /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3142        /* this will fail for cards that aren't VGA class devices, just
3143         * ignore it */
3144        vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3145
3146        if (amdgpu_device_supports_boco(ddev))
3147                boco = true;
3148        if (amdgpu_has_atpx() &&
3149            (amdgpu_is_atpx_hybrid() ||
3150             amdgpu_has_atpx_dgpu_power_cntl()) &&
3151            !pci_is_thunderbolt_attached(adev->pdev))
3152                vga_switcheroo_register_client(adev->pdev,
3153                                               &amdgpu_switcheroo_ops, boco);
3154        if (boco)
3155                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3156
3157        if (amdgpu_emu_mode == 1) {
3158                /* post the asic on emulation mode */
3159                emu_soc_asic_init(adev);
3160                goto fence_driver_init;
3161        }
3162
3163        /* detect if we are with an SRIOV vbios */
3164        amdgpu_device_detect_sriov_bios(adev);
3165
3166        /* check if we need to reset the asic
3167         *  E.g., driver was not cleanly unloaded previously, etc.
3168         */
3169        if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3170                r = amdgpu_asic_reset(adev);
3171                if (r) {
3172                        dev_err(adev->dev, "asic reset on init failed\n");
3173                        goto failed;
3174                }
3175        }
3176
3177        /* Post card if necessary */
3178        if (amdgpu_device_need_post(adev)) {
3179                if (!adev->bios) {
3180                        dev_err(adev->dev, "no vBIOS found\n");
3181                        r = -EINVAL;
3182                        goto failed;
3183                }
3184                DRM_INFO("GPU posting now...\n");
3185                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3186                if (r) {
3187                        dev_err(adev->dev, "gpu post error!\n");
3188                        goto failed;
3189                }
3190        }
3191
3192        if (adev->is_atom_fw) {
3193                /* Initialize clocks */
3194                r = amdgpu_atomfirmware_get_clock_info(adev);
3195                if (r) {
3196                        dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3197                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3198                        goto failed;
3199                }
3200        } else {
3201                /* Initialize clocks */
3202                r = amdgpu_atombios_get_clock_info(adev);
3203                if (r) {
3204                        dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3205                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3206                        goto failed;
3207                }
3208                /* init i2c buses */
3209                if (!amdgpu_device_has_dc_support(adev))
3210                        amdgpu_atombios_i2c_init(adev);
3211        }
3212
3213fence_driver_init:
3214        /* Fence driver */
3215        r = amdgpu_fence_driver_init(adev);
3216        if (r) {
3217                dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3218                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3219                goto failed;
3220        }
3221
3222        /* init the mode config */
3223        drm_mode_config_init(adev->ddev);
3224
3225        r = amdgpu_device_ip_init(adev);
3226        if (r) {
3227                /* failed in exclusive mode due to timeout */
3228                if (amdgpu_sriov_vf(adev) &&
3229                    !amdgpu_sriov_runtime(adev) &&
3230                    amdgpu_virt_mmio_blocked(adev) &&
3231                    !amdgpu_virt_wait_reset(adev)) {
3232                        dev_err(adev->dev, "VF exclusive mode timeout\n");
3233                        /* Don't send request since VF is inactive. */
3234                        adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3235                        adev->virt.ops = NULL;
3236                        r = -EAGAIN;
3237                        goto failed;
3238                }
3239                dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3240                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3241                goto failed;
3242        }
3243
3244        dev_info(adev->dev,
3245                "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3246                        adev->gfx.config.max_shader_engines,
3247                        adev->gfx.config.max_sh_per_se,
3248                        adev->gfx.config.max_cu_per_sh,
3249                        adev->gfx.cu_info.number);
3250
3251        adev->accel_working = true;
3252
3253        amdgpu_vm_check_compute_bug(adev);
3254
3255        /* Initialize the buffer migration limit. */
3256        if (amdgpu_moverate >= 0)
3257                max_MBps = amdgpu_moverate;
3258        else
3259                max_MBps = 8; /* Allow 8 MB/s. */
3260        /* Get a log2 for easy divisions. */
3261        adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3262
3263        amdgpu_fbdev_init(adev);
3264
3265        r = amdgpu_pm_sysfs_init(adev);
3266        if (r) {
3267                adev->pm_sysfs_en = false;
3268                DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3269        } else
3270                adev->pm_sysfs_en = true;
3271
3272        r = amdgpu_ucode_sysfs_init(adev);
3273        if (r) {
3274                adev->ucode_sysfs_en = false;
3275                DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3276        } else
3277                adev->ucode_sysfs_en = true;
3278
3279        if ((amdgpu_testing & 1)) {
3280                if (adev->accel_working)
3281                        amdgpu_test_moves(adev);
3282                else
3283                        DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3284        }
3285        if (amdgpu_benchmarking) {
3286                if (adev->accel_working)
3287                        amdgpu_benchmark(adev, amdgpu_benchmarking);
3288                else
3289                        DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3290        }
3291
3292        /*
3293         * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3294         * Otherwise the mgpu fan boost feature will be skipped due to the
3295         * gpu instance is counted less.
3296         */
3297        amdgpu_register_gpu_instance(adev);
3298
3299        /* enable clockgating, etc. after ib tests, etc. since some blocks require
3300         * explicit gating rather than handling it automatically.
3301         */
3302        r = amdgpu_device_ip_late_init(adev);
3303        if (r) {
3304                dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3305                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3306                goto failed;
3307        }
3308
3309        /* must succeed. */
3310        amdgpu_ras_resume(adev);
3311
3312        queue_delayed_work(system_wq, &adev->delayed_init_work,
3313                           msecs_to_jiffies(AMDGPU_RESUME_MS));
3314
3315        if (amdgpu_sriov_vf(adev))
3316                flush_delayed_work(&adev->delayed_init_work);
3317
3318        r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3319        if (r) {
3320                dev_err(adev->dev, "Could not create amdgpu device attr\n");
3321                return r;
3322        }
3323
3324        if (IS_ENABLED(CONFIG_PERF_EVENTS))
3325                r = amdgpu_pmu_init(adev);
3326        if (r)
3327                dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3328
3329        return 0;
3330
3331failed:
3332        amdgpu_vf_error_trans_all(adev);
3333        if (boco)
3334                vga_switcheroo_fini_domain_pm_ops(adev->dev);
3335
3336        return r;
3337}
3338
3339/**
3340 * amdgpu_device_fini - tear down the driver
3341 *
3342 * @adev: amdgpu_device pointer
3343 *
3344 * Tear down the driver info (all asics).
3345 * Called at driver shutdown.
3346 */
3347void amdgpu_device_fini(struct amdgpu_device *adev)
3348{
3349        int r;
3350
3351        DRM_INFO("amdgpu: finishing device.\n");
3352        flush_delayed_work(&adev->delayed_init_work);
3353        adev->shutdown = true;
3354
3355        /* make sure IB test finished before entering exclusive mode
3356         * to avoid preemption on IB test
3357         * */
3358        if (amdgpu_sriov_vf(adev))
3359                amdgpu_virt_request_full_gpu(adev, false);
3360
3361        /* disable all interrupts */
3362        amdgpu_irq_disable_all(adev);
3363        if (adev->mode_info.mode_config_initialized){
3364                if (!amdgpu_device_has_dc_support(adev))
3365                        drm_helper_force_disable_all(adev->ddev);
3366                else
3367                        drm_atomic_helper_shutdown(adev->ddev);
3368        }
3369        amdgpu_fence_driver_fini(adev);
3370        if (adev->pm_sysfs_en)
3371                amdgpu_pm_sysfs_fini(adev);
3372        amdgpu_fbdev_fini(adev);
3373        r = amdgpu_device_ip_fini(adev);
3374        release_firmware(adev->firmware.gpu_info_fw);
3375        adev->firmware.gpu_info_fw = NULL;
3376        adev->accel_working = false;
3377        /* free i2c buses */
3378        if (!amdgpu_device_has_dc_support(adev))
3379                amdgpu_i2c_fini(adev);
3380
3381        if (amdgpu_emu_mode != 1)
3382                amdgpu_atombios_fini(adev);
3383
3384        kfree(adev->bios);
3385        adev->bios = NULL;
3386        if (amdgpu_has_atpx() &&
3387            (amdgpu_is_atpx_hybrid() ||
3388             amdgpu_has_atpx_dgpu_power_cntl()) &&
3389            !pci_is_thunderbolt_attached(adev->pdev))
3390                vga_switcheroo_unregister_client(adev->pdev);
3391        if (amdgpu_device_supports_boco(adev->ddev))
3392                vga_switcheroo_fini_domain_pm_ops(adev->dev);
3393        vga_client_register(adev->pdev, NULL, NULL, NULL);
3394        if (adev->rio_mem)
3395                pci_iounmap(adev->pdev, adev->rio_mem);
3396        adev->rio_mem = NULL;
3397        iounmap(adev->rmmio);
3398        adev->rmmio = NULL;
3399        amdgpu_device_doorbell_fini(adev);
3400
3401        if (adev->ucode_sysfs_en)
3402                amdgpu_ucode_sysfs_fini(adev);
3403
3404        sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3405        if (IS_ENABLED(CONFIG_PERF_EVENTS))
3406                amdgpu_pmu_fini(adev);
3407        if (adev->discovery_bin)
3408                amdgpu_discovery_fini(adev);
3409}
3410
3411
3412/*
3413 * Suspend & resume.
3414 */
3415/**
3416 * amdgpu_device_suspend - initiate device suspend
3417 *
3418 * @dev: drm dev pointer
3419 * @fbcon : notify the fbdev of suspend
3420 *
3421 * Puts the hw in the suspend state (all asics).
3422 * Returns 0 for success or an error on failure.
3423 * Called at driver suspend.
3424 */
3425int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3426{
3427        struct amdgpu_device *adev;
3428        struct drm_crtc *crtc;
3429        struct drm_connector *connector;
3430        struct drm_connector_list_iter iter;
3431        int r;
3432
3433        if (dev == NULL || dev->dev_private == NULL) {
3434                return -ENODEV;
3435        }
3436
3437        adev = dev->dev_private;
3438
3439        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3440                return 0;
3441
3442        adev->in_suspend = true;
3443        drm_kms_helper_poll_disable(dev);
3444
3445        if (fbcon)
3446                amdgpu_fbdev_set_suspend(adev, 1);
3447
3448        cancel_delayed_work_sync(&adev->delayed_init_work);
3449
3450        if (!amdgpu_device_has_dc_support(adev)) {
3451                /* turn off display hw */
3452                drm_modeset_lock_all(dev);
3453                drm_connector_list_iter_begin(dev, &iter);
3454                drm_for_each_connector_iter(connector, &iter)
3455                        drm_helper_connector_dpms(connector,
3456                                                  DRM_MODE_DPMS_OFF);
3457                drm_connector_list_iter_end(&iter);
3458                drm_modeset_unlock_all(dev);
3459                        /* unpin the front buffers and cursors */
3460                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3461                        struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3462                        struct drm_framebuffer *fb = crtc->primary->fb;
3463                        struct amdgpu_bo *robj;
3464
3465                        if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3466                                struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3467                                r = amdgpu_bo_reserve(aobj, true);
3468                                if (r == 0) {
3469                                        amdgpu_bo_unpin(aobj);
3470                                        amdgpu_bo_unreserve(aobj);
3471                                }
3472                        }
3473
3474                        if (fb == NULL || fb->obj[0] == NULL) {
3475                                continue;
3476                        }
3477                        robj = gem_to_amdgpu_bo(fb->obj[0]);
3478                        /* don't unpin kernel fb objects */
3479                        if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3480                                r = amdgpu_bo_reserve(robj, true);
3481                                if (r == 0) {
3482                                        amdgpu_bo_unpin(robj);
3483                                        amdgpu_bo_unreserve(robj);
3484                                }
3485                        }
3486                }
3487        }
3488
3489        amdgpu_ras_suspend(adev);
3490
3491        r = amdgpu_device_ip_suspend_phase1(adev);
3492
3493        amdgpu_amdkfd_suspend(adev, !fbcon);
3494
3495        /* evict vram memory */
3496        amdgpu_bo_evict_vram(adev);
3497
3498        amdgpu_fence_driver_suspend(adev);
3499
3500        r = amdgpu_device_ip_suspend_phase2(adev);
3501
3502        /* evict remaining vram memory
3503         * This second call to evict vram is to evict the gart page table
3504         * using the CPU.
3505         */
3506        amdgpu_bo_evict_vram(adev);
3507
3508        return 0;
3509}
3510
3511/**
3512 * amdgpu_device_resume - initiate device resume
3513 *
3514 * @dev: drm dev pointer
3515 * @fbcon : notify the fbdev of resume
3516 *
3517 * Bring the hw back to operating state (all asics).
3518 * Returns 0 for success or an error on failure.
3519 * Called at driver resume.
3520 */
3521int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3522{
3523        struct drm_connector *connector;
3524        struct drm_connector_list_iter iter;
3525        struct amdgpu_device *adev = dev->dev_private;
3526        struct drm_crtc *crtc;
3527        int r = 0;
3528
3529        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3530                return 0;
3531
3532        /* post card */
3533        if (amdgpu_device_need_post(adev)) {
3534                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3535                if (r)
3536                        DRM_ERROR("amdgpu asic init failed\n");
3537        }
3538
3539        r = amdgpu_device_ip_resume(adev);
3540        if (r) {
3541                DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3542                return r;
3543        }
3544        amdgpu_fence_driver_resume(adev);
3545
3546
3547        r = amdgpu_device_ip_late_init(adev);
3548        if (r)
3549                return r;
3550
3551        queue_delayed_work(system_wq, &adev->delayed_init_work,
3552                           msecs_to_jiffies(AMDGPU_RESUME_MS));
3553
3554        if (!amdgpu_device_has_dc_support(adev)) {
3555                /* pin cursors */
3556                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3557                        struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3558
3559                        if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3560                                struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3561                                r = amdgpu_bo_reserve(aobj, true);
3562                                if (r == 0) {
3563                                        r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3564                                        if (r != 0)
3565                                                DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3566                                        amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3567                                        amdgpu_bo_unreserve(aobj);
3568                                }
3569                        }
3570                }
3571        }
3572        r = amdgpu_amdkfd_resume(adev, !fbcon);
3573        if (r)
3574                return r;
3575
3576        /* Make sure IB tests flushed */
3577        flush_delayed_work(&adev->delayed_init_work);
3578
3579        /* blat the mode back in */
3580        if (fbcon) {
3581                if (!amdgpu_device_has_dc_support(adev)) {
3582                        /* pre DCE11 */
3583                        drm_helper_resume_force_mode(dev);
3584
3585                        /* turn on display hw */
3586                        drm_modeset_lock_all(dev);
3587
3588                        drm_connector_list_iter_begin(dev, &iter);
3589                        drm_for_each_connector_iter(connector, &iter)
3590                                drm_helper_connector_dpms(connector,
3591                                                          DRM_MODE_DPMS_ON);
3592                        drm_connector_list_iter_end(&iter);
3593
3594                        drm_modeset_unlock_all(dev);
3595                }
3596                amdgpu_fbdev_set_suspend(adev, 0);
3597        }
3598
3599        drm_kms_helper_poll_enable(dev);
3600
3601        amdgpu_ras_resume(adev);
3602
3603        /*
3604         * Most of the connector probing functions try to acquire runtime pm
3605         * refs to ensure that the GPU is powered on when connector polling is
3606         * performed. Since we're calling this from a runtime PM callback,
3607         * trying to acquire rpm refs will cause us to deadlock.
3608         *
3609         * Since we're guaranteed to be holding the rpm lock, it's safe to
3610         * temporarily disable the rpm helpers so this doesn't deadlock us.
3611         */
3612#ifdef CONFIG_PM
3613        dev->dev->power.disable_depth++;
3614#endif
3615        if (!amdgpu_device_has_dc_support(adev))
3616                drm_helper_hpd_irq_event(dev);
3617        else
3618                drm_kms_helper_hotplug_event(dev);
3619#ifdef CONFIG_PM
3620        dev->dev->power.disable_depth--;
3621#endif
3622        adev->in_suspend = false;
3623
3624        return 0;
3625}
3626
3627/**
3628 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3629 *
3630 * @adev: amdgpu_device pointer
3631 *
3632 * The list of all the hardware IPs that make up the asic is walked and
3633 * the check_soft_reset callbacks are run.  check_soft_reset determines
3634 * if the asic is still hung or not.
3635 * Returns true if any of the IPs are still in a hung state, false if not.
3636 */
3637static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3638{
3639        int i;
3640        bool asic_hang = false;
3641
3642        if (amdgpu_sriov_vf(adev))
3643                return true;
3644
3645        if (amdgpu_asic_need_full_reset(adev))
3646                return true;
3647
3648        for (i = 0; i < adev->num_ip_blocks; i++) {
3649                if (!adev->ip_blocks[i].status.valid)
3650                        continue;
3651                if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3652                        adev->ip_blocks[i].status.hang =
3653                                adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3654                if (adev->ip_blocks[i].status.hang) {
3655                        DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3656                        asic_hang = true;
3657                }
3658        }
3659        return asic_hang;
3660}
3661
3662/**
3663 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3664 *
3665 * @adev: amdgpu_device pointer
3666 *
3667 * The list of all the hardware IPs that make up the asic is walked and the
3668 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3669 * handles any IP specific hardware or software state changes that are
3670 * necessary for a soft reset to succeed.
3671 * Returns 0 on success, negative error code on failure.
3672 */
3673static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3674{
3675        int i, r = 0;
3676
3677        for (i = 0; i < adev->num_ip_blocks; i++) {
3678                if (!adev->ip_blocks[i].status.valid)
3679                        continue;
3680                if (adev->ip_blocks[i].status.hang &&
3681                    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3682                        r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3683                        if (r)
3684                                return r;
3685                }
3686        }
3687
3688        return 0;
3689}
3690
3691/**
3692 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3693 *
3694 * @adev: amdgpu_device pointer
3695 *
3696 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3697 * reset is necessary to recover.
3698 * Returns true if a full asic reset is required, false if not.
3699 */
3700static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3701{
3702        int i;
3703
3704        if (amdgpu_asic_need_full_reset(adev))
3705                return true;
3706
3707        for (i = 0; i < adev->num_ip_blocks; i++) {
3708                if (!adev->ip_blocks[i].status.valid)
3709                        continue;
3710                if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3711                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3712                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3713                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3714                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3715                        if (adev->ip_blocks[i].status.hang) {
3716                                DRM_INFO("Some block need full reset!\n");
3717                                return true;
3718                        }
3719                }
3720        }
3721        return false;
3722}
3723
3724/**
3725 * amdgpu_device_ip_soft_reset - do a soft reset
3726 *
3727 * @adev: amdgpu_device pointer
3728 *
3729 * The list of all the hardware IPs that make up the asic is walked and the
3730 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3731 * IP specific hardware or software state changes that are necessary to soft
3732 * reset the IP.
3733 * Returns 0 on success, negative error code on failure.
3734 */
3735static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3736{
3737        int i, r = 0;
3738
3739        for (i = 0; i < adev->num_ip_blocks; i++) {
3740                if (!adev->ip_blocks[i].status.valid)
3741                        continue;
3742                if (adev->ip_blocks[i].status.hang &&
3743                    adev->ip_blocks[i].version->funcs->soft_reset) {
3744                        r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3745                        if (r)
3746                                return r;
3747                }
3748        }
3749
3750        return 0;
3751}
3752
3753/**
3754 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3755 *
3756 * @adev: amdgpu_device pointer
3757 *
3758 * The list of all the hardware IPs that make up the asic is walked and the
3759 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3760 * handles any IP specific hardware or software state changes that are
3761 * necessary after the IP has been soft reset.
3762 * Returns 0 on success, negative error code on failure.
3763 */
3764static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3765{
3766        int i, r = 0;
3767
3768        for (i = 0; i < adev->num_ip_blocks; i++) {
3769                if (!adev->ip_blocks[i].status.valid)
3770                        continue;
3771                if (adev->ip_blocks[i].status.hang &&
3772                    adev->ip_blocks[i].version->funcs->post_soft_reset)
3773                        r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3774                if (r)
3775                        return r;
3776        }
3777
3778        return 0;
3779}
3780
3781/**
3782 * amdgpu_device_recover_vram - Recover some VRAM contents
3783 *
3784 * @adev: amdgpu_device pointer
3785 *
3786 * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3787 * restore things like GPUVM page tables after a GPU reset where
3788 * the contents of VRAM might be lost.
3789 *
3790 * Returns:
3791 * 0 on success, negative error code on failure.
3792 */
3793static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3794{
3795        struct dma_fence *fence = NULL, *next = NULL;
3796        struct amdgpu_bo *shadow;
3797        long r = 1, tmo;
3798
3799        if (amdgpu_sriov_runtime(adev))
3800                tmo = msecs_to_jiffies(8000);
3801        else
3802                tmo = msecs_to_jiffies(100);
3803
3804        DRM_INFO("recover vram bo from shadow start\n");
3805        mutex_lock(&adev->shadow_list_lock);
3806        list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3807
3808                /* No need to recover an evicted BO */
3809                if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3810                    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3811                    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3812                        continue;
3813
3814                r = amdgpu_bo_restore_shadow(shadow, &next);
3815                if (r)
3816                        break;
3817
3818                if (fence) {
3819                        tmo = dma_fence_wait_timeout(fence, false, tmo);
3820                        dma_fence_put(fence);
3821                        fence = next;
3822                        if (tmo == 0) {
3823                                r = -ETIMEDOUT;
3824                                break;
3825                        } else if (tmo < 0) {
3826                                r = tmo;
3827                                break;
3828                        }
3829                } else {
3830                        fence = next;
3831                }
3832        }
3833        mutex_unlock(&adev->shadow_list_lock);
3834
3835        if (fence)
3836                tmo = dma_fence_wait_timeout(fence, false, tmo);
3837        dma_fence_put(fence);
3838
3839        if (r < 0 || tmo <= 0) {
3840                DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3841                return -EIO;
3842        }
3843
3844        DRM_INFO("recover vram bo from shadow done\n");
3845        return 0;
3846}
3847
3848
3849/**
3850 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3851 *
3852 * @adev: amdgpu device pointer
3853 * @from_hypervisor: request from hypervisor
3854 *
3855 * do VF FLR and reinitialize Asic
3856 * return 0 means succeeded otherwise failed
3857 */
3858static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3859                                     bool from_hypervisor)
3860{
3861        int r;
3862
3863        if (from_hypervisor)
3864                r = amdgpu_virt_request_full_gpu(adev, true);
3865        else
3866                r = amdgpu_virt_reset_gpu(adev);
3867        if (r)
3868                return r;
3869
3870        amdgpu_amdkfd_pre_reset(adev);
3871
3872        /* Resume IP prior to SMC */
3873        r = amdgpu_device_ip_reinit_early_sriov(adev);
3874        if (r)
3875                goto error;
3876
3877        amdgpu_virt_init_data_exchange(adev);
3878        /* we need recover gart prior to run SMC/CP/SDMA resume */
3879        amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3880
3881        r = amdgpu_device_fw_loading(adev);
3882        if (r)
3883                return r;
3884
3885        /* now we are okay to resume SMC/CP/SDMA */
3886        r = amdgpu_device_ip_reinit_late_sriov(adev);
3887        if (r)
3888                goto error;
3889
3890        amdgpu_irq_gpu_reset_resume_helper(adev);
3891        r = amdgpu_ib_ring_tests(adev);
3892        amdgpu_amdkfd_post_reset(adev);
3893
3894error:
3895        amdgpu_virt_release_full_gpu(adev, true);
3896        if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3897                amdgpu_inc_vram_lost(adev);
3898                r = amdgpu_device_recover_vram(adev);
3899        }
3900
3901        return r;
3902}
3903
3904/**
3905 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3906 *
3907 * @adev: amdgpu device pointer
3908 *
3909 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3910 * a hung GPU.
3911 */
3912bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3913{
3914        if (!amdgpu_device_ip_check_soft_reset(adev)) {
3915                DRM_INFO("Timeout, but no hardware hang detected.\n");
3916                return false;
3917        }
3918
3919        if (amdgpu_gpu_recovery == 0)
3920                goto disabled;
3921
3922        if (amdgpu_sriov_vf(adev))
3923                return true;
3924
3925        if (amdgpu_gpu_recovery == -1) {
3926                switch (adev->asic_type) {
3927                case CHIP_BONAIRE:
3928                case CHIP_HAWAII:
3929                case CHIP_TOPAZ:
3930                case CHIP_TONGA:
3931                case CHIP_FIJI:
3932                case CHIP_POLARIS10:
3933                case CHIP_POLARIS11:
3934                case CHIP_POLARIS12:
3935                case CHIP_VEGAM:
3936                case CHIP_VEGA20:
3937                case CHIP_VEGA10:
3938                case CHIP_VEGA12:
3939                case CHIP_RAVEN:
3940                case CHIP_ARCTURUS:
3941                case CHIP_RENOIR:
3942                case CHIP_NAVI10:
3943                case CHIP_NAVI14:
3944                case CHIP_NAVI12:
3945                case CHIP_SIENNA_CICHLID:
3946                        break;
3947                default:
3948                        goto disabled;
3949                }
3950        }
3951
3952        return true;
3953
3954disabled:
3955                DRM_INFO("GPU recovery disabled.\n");
3956                return false;
3957}
3958
3959
3960static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3961                                        struct amdgpu_job *job,
3962                                        bool *need_full_reset_arg)
3963{
3964        int i, r = 0;
3965        bool need_full_reset  = *need_full_reset_arg;
3966
3967        amdgpu_debugfs_wait_dump(adev);
3968
3969        /* block all schedulers and reset given job's ring */
3970        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3971                struct amdgpu_ring *ring = adev->rings[i];
3972
3973                if (!ring || !ring->sched.thread)
3974                        continue;
3975
3976                /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3977                amdgpu_fence_driver_force_completion(ring);
3978        }
3979
3980        if(job)
3981                drm_sched_increase_karma(&job->base);
3982
3983        /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3984        if (!amdgpu_sriov_vf(adev)) {
3985
3986                if (!need_full_reset)
3987                        need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3988
3989                if (!need_full_reset) {
3990                        amdgpu_device_ip_pre_soft_reset(adev);
3991                        r = amdgpu_device_ip_soft_reset(adev);
3992                        amdgpu_device_ip_post_soft_reset(adev);
3993                        if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3994                                DRM_INFO("soft reset failed, will fallback to full reset!\n");
3995                                need_full_reset = true;
3996                        }
3997                }
3998
3999                if (need_full_reset)
4000                        r = amdgpu_device_ip_suspend(adev);
4001
4002                *need_full_reset_arg = need_full_reset;
4003        }
4004
4005        return r;
4006}
4007
4008static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
4009                               struct list_head *device_list_handle,
4010                               bool *need_full_reset_arg)
4011{
4012        struct amdgpu_device *tmp_adev = NULL;
4013        bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4014        int r = 0;
4015
4016        /*
4017         * ASIC reset has to be done on all HGMI hive nodes ASAP
4018         * to allow proper links negotiation in FW (within 1 sec)
4019         */
4020        if (need_full_reset) {
4021                list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4022                        /* For XGMI run all resets in parallel to speed up the process */
4023                        if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4024                                if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4025                                        r = -EALREADY;
4026                        } else
4027                                r = amdgpu_asic_reset(tmp_adev);
4028
4029                        if (r) {
4030                                DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4031                                         r, tmp_adev->ddev->unique);
4032                                break;
4033                        }
4034                }
4035
4036                /* For XGMI wait for all resets to complete before proceed */
4037                if (!r) {
4038                        list_for_each_entry(tmp_adev, device_list_handle,
4039                                            gmc.xgmi.head) {
4040                                if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4041                                        flush_work(&tmp_adev->xgmi_reset_work);
4042                                        r = tmp_adev->asic_reset_res;
4043                                        if (r)
4044                                                break;
4045                                }
4046                        }
4047                }
4048        }
4049
4050        if (!r && amdgpu_ras_intr_triggered()) {
4051                list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4052                        if (tmp_adev->mmhub.funcs &&
4053                            tmp_adev->mmhub.funcs->reset_ras_error_count)
4054                                tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4055                }
4056
4057                amdgpu_ras_intr_cleared();
4058        }
4059
4060        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4061                if (need_full_reset) {
4062                        /* post card */
4063                        if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4064                                DRM_WARN("asic atom init failed!");
4065
4066                        if (!r) {
4067                                dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4068                                r = amdgpu_device_ip_resume_phase1(tmp_adev);
4069                                if (r)
4070                                        goto out;
4071
4072                                vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4073                                if (vram_lost) {
4074                                        DRM_INFO("VRAM is lost due to GPU reset!\n");
4075                                        amdgpu_inc_vram_lost(tmp_adev);
4076                                }
4077
4078                                r = amdgpu_gtt_mgr_recover(
4079                                        &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4080                                if (r)
4081                                        goto out;
4082
4083                                r = amdgpu_device_fw_loading(tmp_adev);
4084                                if (r)
4085                                        return r;
4086
4087                                r = amdgpu_device_ip_resume_phase2(tmp_adev);
4088                                if (r)
4089                                        goto out;
4090
4091                                if (vram_lost)
4092                                        amdgpu_device_fill_reset_magic(tmp_adev);
4093
4094                                /*
4095                                 * Add this ASIC as tracked as reset was already
4096                                 * complete successfully.
4097                                 */
4098                                amdgpu_register_gpu_instance(tmp_adev);
4099
4100                                r = amdgpu_device_ip_late_init(tmp_adev);
4101                                if (r)
4102                                        goto out;
4103
4104                                amdgpu_fbdev_set_suspend(tmp_adev, 0);
4105
4106                                /* must succeed. */
4107                                amdgpu_ras_resume(tmp_adev);
4108
4109                                /* Update PSP FW topology after reset */
4110                                if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4111                                        r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4112                        }
4113                }
4114
4115
4116out:
4117                if (!r) {
4118                        amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4119                        r = amdgpu_ib_ring_tests(tmp_adev);
4120                        if (r) {
4121                                dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4122                                r = amdgpu_device_ip_suspend(tmp_adev);
4123                                need_full_reset = true;
4124                                r = -EAGAIN;
4125                                goto end;
4126                        }
4127                }
4128
4129                if (!r)
4130                        r = amdgpu_device_recover_vram(tmp_adev);
4131                else
4132                        tmp_adev->asic_reset_res = r;
4133        }
4134
4135end:
4136        *need_full_reset_arg = need_full_reset;
4137        return r;
4138}
4139
4140static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
4141{
4142        if (trylock) {
4143                if (!mutex_trylock(&adev->lock_reset))
4144                        return false;
4145        } else
4146                mutex_lock(&adev->lock_reset);
4147
4148        atomic_inc(&adev->gpu_reset_counter);
4149        adev->in_gpu_reset = true;
4150        switch (amdgpu_asic_reset_method(adev)) {
4151        case AMD_RESET_METHOD_MODE1:
4152                adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4153                break;
4154        case AMD_RESET_METHOD_MODE2:
4155                adev->mp1_state = PP_MP1_STATE_RESET;
4156                break;
4157        default:
4158                adev->mp1_state = PP_MP1_STATE_NONE;
4159                break;
4160        }
4161
4162        return true;
4163}
4164
4165static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4166{
4167        amdgpu_vf_error_trans_all(adev);
4168        adev->mp1_state = PP_MP1_STATE_NONE;
4169        adev->in_gpu_reset = false;
4170        mutex_unlock(&adev->lock_reset);
4171}
4172
4173static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4174{
4175        struct pci_dev *p = NULL;
4176
4177        p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4178                        adev->pdev->bus->number, 1);
4179        if (p) {
4180                pm_runtime_enable(&(p->dev));
4181                pm_runtime_resume(&(p->dev));
4182        }
4183}
4184
4185static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4186{
4187        enum amd_reset_method reset_method;
4188        struct pci_dev *p = NULL;
4189        u64 expires;
4190
4191        /*
4192         * For now, only BACO and mode1 reset are confirmed
4193         * to suffer the audio issue without proper suspended.
4194         */
4195        reset_method = amdgpu_asic_reset_method(adev);
4196        if ((reset_method != AMD_RESET_METHOD_BACO) &&
4197             (reset_method != AMD_RESET_METHOD_MODE1))
4198                return -EINVAL;
4199
4200        p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4201                        adev->pdev->bus->number, 1);
4202        if (!p)
4203                return -ENODEV;
4204
4205        expires = pm_runtime_autosuspend_expiration(&(p->dev));
4206        if (!expires)
4207                /*
4208                 * If we cannot get the audio device autosuspend delay,
4209                 * a fixed 4S interval will be used. Considering 3S is
4210                 * the audio controller default autosuspend delay setting.
4211                 * 4S used here is guaranteed to cover that.
4212                 */
4213                expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4214
4215        while (!pm_runtime_status_suspended(&(p->dev))) {
4216                if (!pm_runtime_suspend(&(p->dev)))
4217                        break;
4218
4219                if (expires < ktime_get_mono_fast_ns()) {
4220                        dev_warn(adev->dev, "failed to suspend display audio\n");
4221                        /* TODO: abort the succeeding gpu reset? */
4222                        return -ETIMEDOUT;
4223                }
4224        }
4225
4226        pm_runtime_disable(&(p->dev));
4227
4228        return 0;
4229}
4230
4231/**
4232 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4233 *
4234 * @adev: amdgpu device pointer
4235 * @job: which job trigger hang
4236 *
4237 * Attempt to reset the GPU if it has hung (all asics).
4238 * Attempt to do soft-reset or full-reset and reinitialize Asic
4239 * Returns 0 for success or an error on failure.
4240 */
4241
4242int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4243                              struct amdgpu_job *job)
4244{
4245        struct list_head device_list, *device_list_handle =  NULL;
4246        bool need_full_reset = false;
4247        bool job_signaled = false;
4248        struct amdgpu_hive_info *hive = NULL;
4249        struct amdgpu_device *tmp_adev = NULL;
4250        int i, r = 0;
4251        bool need_emergency_restart = false;
4252        bool audio_suspended = false;
4253
4254        /**
4255         * Special case: RAS triggered and full reset isn't supported
4256         */
4257        need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4258
4259        /*
4260         * Flush RAM to disk so that after reboot
4261         * the user can read log and see why the system rebooted.
4262         */
4263        if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
4264                DRM_WARN("Emergency reboot.");
4265
4266                ksys_sync_helper();
4267                emergency_restart();
4268        }
4269
4270        dev_info(adev->dev, "GPU %s begin!\n",
4271                need_emergency_restart ? "jobs stop":"reset");
4272
4273        /*
4274         * Here we trylock to avoid chain of resets executing from
4275         * either trigger by jobs on different adevs in XGMI hive or jobs on
4276         * different schedulers for same device while this TO handler is running.
4277         * We always reset all schedulers for device and all devices for XGMI
4278         * hive so that should take care of them too.
4279         */
4280        hive = amdgpu_get_xgmi_hive(adev, true);
4281        if (hive && !mutex_trylock(&hive->reset_lock)) {
4282                DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4283                          job ? job->base.id : -1, hive->hive_id);
4284                mutex_unlock(&hive->hive_lock);
4285                return 0;
4286        }
4287
4288        /*
4289         * Build list of devices to reset.
4290         * In case we are in XGMI hive mode, resort the device list
4291         * to put adev in the 1st position.
4292         */
4293        INIT_LIST_HEAD(&device_list);
4294        if (adev->gmc.xgmi.num_physical_nodes > 1) {
4295                if (!hive)
4296                        return -ENODEV;
4297                if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4298                        list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
4299                device_list_handle = &hive->device_list;
4300        } else {
4301                list_add_tail(&adev->gmc.xgmi.head, &device_list);
4302                device_list_handle = &device_list;
4303        }
4304
4305        /* block all schedulers and reset given job's ring */
4306        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4307                if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4308                        DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4309                                  job ? job->base.id : -1);
4310                        mutex_unlock(&hive->hive_lock);
4311                        return 0;
4312                }
4313
4314                /*
4315                 * Try to put the audio codec into suspend state
4316                 * before gpu reset started.
4317                 *
4318                 * Due to the power domain of the graphics device
4319                 * is shared with AZ power domain. Without this,
4320                 * we may change the audio hardware from behind
4321                 * the audio driver's back. That will trigger
4322                 * some audio codec errors.
4323                 */
4324                if (!amdgpu_device_suspend_display_audio(tmp_adev))
4325                        audio_suspended = true;
4326
4327                amdgpu_ras_set_error_query_ready(tmp_adev, false);
4328
4329                cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4330
4331                if (!amdgpu_sriov_vf(tmp_adev))
4332                        amdgpu_amdkfd_pre_reset(tmp_adev);
4333
4334                /*
4335                 * Mark these ASICs to be reseted as untracked first
4336                 * And add them back after reset completed
4337                 */
4338                amdgpu_unregister_gpu_instance(tmp_adev);
4339
4340                amdgpu_fbdev_set_suspend(tmp_adev, 1);
4341
4342                /* disable ras on ALL IPs */
4343                if (!need_emergency_restart &&
4344                      amdgpu_device_ip_need_full_reset(tmp_adev))
4345                        amdgpu_ras_suspend(tmp_adev);
4346
4347                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4348                        struct amdgpu_ring *ring = tmp_adev->rings[i];
4349
4350                        if (!ring || !ring->sched.thread)
4351                                continue;
4352
4353                        drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4354
4355                        if (need_emergency_restart)
4356                                amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4357                }
4358        }
4359
4360        if (need_emergency_restart)
4361                goto skip_sched_resume;
4362
4363        /*
4364         * Must check guilty signal here since after this point all old
4365         * HW fences are force signaled.
4366         *
4367         * job->base holds a reference to parent fence
4368         */
4369        if (job && job->base.s_fence->parent &&
4370            dma_fence_is_signaled(job->base.s_fence->parent)) {
4371                job_signaled = true;
4372                dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4373                goto skip_hw_reset;
4374        }
4375
4376retry:  /* Rest of adevs pre asic reset from XGMI hive. */
4377        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4378                r = amdgpu_device_pre_asic_reset(tmp_adev,
4379                                                 NULL,
4380                                                 &need_full_reset);
4381                /*TODO Should we stop ?*/
4382                if (r) {
4383                        DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4384                                  r, tmp_adev->ddev->unique);
4385                        tmp_adev->asic_reset_res = r;
4386                }
4387        }
4388
4389        /* Actual ASIC resets if needed.*/
4390        /* TODO Implement XGMI hive reset logic for SRIOV */
4391        if (amdgpu_sriov_vf(adev)) {
4392                r = amdgpu_device_reset_sriov(adev, job ? false : true);
4393                if (r)
4394                        adev->asic_reset_res = r;
4395        } else {
4396                r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4397                if (r && r == -EAGAIN)
4398                        goto retry;
4399        }
4400
4401skip_hw_reset:
4402
4403        /* Post ASIC reset for all devs .*/
4404        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4405
4406                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4407                        struct amdgpu_ring *ring = tmp_adev->rings[i];
4408
4409                        if (!ring || !ring->sched.thread)
4410                                continue;
4411
4412                        /* No point to resubmit jobs if we didn't HW reset*/
4413                        if (!tmp_adev->asic_reset_res && !job_signaled)
4414                                drm_sched_resubmit_jobs(&ring->sched);
4415
4416                        drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4417                }
4418
4419                if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4420                        drm_helper_resume_force_mode(tmp_adev->ddev);
4421                }
4422
4423                tmp_adev->asic_reset_res = 0;
4424
4425                if (r) {
4426                        /* bad news, how to tell it to userspace ? */
4427                        dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4428                        amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4429                } else {
4430                        dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4431                }
4432        }
4433
4434skip_sched_resume:
4435        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4436                /*unlock kfd: SRIOV would do it separately */
4437                if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
4438                        amdgpu_amdkfd_post_reset(tmp_adev);
4439                if (audio_suspended)
4440                        amdgpu_device_resume_display_audio(tmp_adev);
4441                amdgpu_device_unlock_adev(tmp_adev);
4442        }
4443
4444        if (hive) {
4445                mutex_unlock(&hive->reset_lock);
4446                mutex_unlock(&hive->hive_lock);
4447        }
4448
4449        if (r)
4450                dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4451        return r;
4452}
4453
4454/**
4455 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4456 *
4457 * @adev: amdgpu_device pointer
4458 *
4459 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4460 * and lanes) of the slot the device is in. Handles APUs and
4461 * virtualized environments where PCIE config space may not be available.
4462 */
4463static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4464{
4465        struct pci_dev *pdev;
4466        enum pci_bus_speed speed_cap, platform_speed_cap;
4467        enum pcie_link_width platform_link_width;
4468
4469        if (amdgpu_pcie_gen_cap)
4470                adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4471
4472        if (amdgpu_pcie_lane_cap)
4473                adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4474
4475        /* covers APUs as well */
4476        if (pci_is_root_bus(adev->pdev->bus)) {
4477                if (adev->pm.pcie_gen_mask == 0)
4478                        adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4479                if (adev->pm.pcie_mlw_mask == 0)
4480                        adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4481                return;
4482        }
4483
4484        if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4485                return;
4486
4487        pcie_bandwidth_available(adev->pdev, NULL,
4488                                 &platform_speed_cap, &platform_link_width);
4489
4490        if (adev->pm.pcie_gen_mask == 0) {
4491                /* asic caps */
4492                pdev = adev->pdev;
4493                speed_cap = pcie_get_speed_cap(pdev);
4494                if (speed_cap == PCI_SPEED_UNKNOWN) {
4495                        adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4496                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4497                                                  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4498                } else {
4499                        if (speed_cap == PCIE_SPEED_16_0GT)
4500                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4501                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4502                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4503                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4504                        else if (speed_cap == PCIE_SPEED_8_0GT)
4505                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4506                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4507                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4508                        else if (speed_cap == PCIE_SPEED_5_0GT)
4509                                adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4510                                                          CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4511                        else
4512                                adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4513                }
4514                /* platform caps */
4515                if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4516                        adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4517                                                   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4518                } else {
4519                        if (platform_speed_cap == PCIE_SPEED_16_0GT)
4520                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4521                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4522                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4523                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4524                        else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4525                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4526                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4527                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4528                        else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4529                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4530                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4531                        else
4532                                adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4533
4534                }
4535        }
4536        if (adev->pm.pcie_mlw_mask == 0) {
4537                if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4538                        adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4539                } else {
4540                        switch (platform_link_width) {
4541                        case PCIE_LNK_X32:
4542                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4543                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4544                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4545                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4546                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4547                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4548                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4549                                break;
4550                        case PCIE_LNK_X16:
4551                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4552                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4553                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4554                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4555                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4556                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4557                                break;
4558                        case PCIE_LNK_X12:
4559                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4560                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4561                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4562                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4563                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4564                                break;
4565                        case PCIE_LNK_X8:
4566                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4567                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4568                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4569                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4570                                break;
4571                        case PCIE_LNK_X4:
4572                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4573                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4574                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4575                                break;
4576                        case PCIE_LNK_X2:
4577                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4578                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4579                                break;
4580                        case PCIE_LNK_X1:
4581                                adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4582                                break;
4583                        default:
4584                                break;
4585                        }
4586                }
4587        }
4588}
4589
4590int amdgpu_device_baco_enter(struct drm_device *dev)
4591{
4592        struct amdgpu_device *adev = dev->dev_private;
4593        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4594
4595        if (!amdgpu_device_supports_baco(adev->ddev))
4596                return -ENOTSUPP;
4597
4598        if (ras && ras->supported)
4599                adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4600
4601        return amdgpu_dpm_baco_enter(adev);
4602}
4603
4604int amdgpu_device_baco_exit(struct drm_device *dev)
4605{
4606        struct amdgpu_device *adev = dev->dev_private;
4607        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4608        int ret = 0;
4609
4610        if (!amdgpu_device_supports_baco(adev->ddev))
4611                return -ENOTSUPP;
4612
4613        ret = amdgpu_dpm_baco_exit(adev);
4614        if (ret)
4615                return ret;
4616
4617        if (ras && ras->supported)
4618                adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4619
4620        return 0;
4621}
4622