linux/drivers/gpu/drm/radeon/cik.c
<<
>>
Prefs
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
  26#include <linux/module.h>
  27#include <linux/pci.h>
  28#include <linux/slab.h>
  29
  30#include <drm/drm_vblank.h>
  31
  32#include "atom.h"
  33#include "evergreen.h"
  34#include "cik_blit_shaders.h"
  35#include "cik.h"
  36#include "cikd.h"
  37#include "clearstate_ci.h"
  38#include "r600.h"
  39#include "radeon.h"
  40#include "radeon_asic.h"
  41#include "radeon_audio.h"
  42#include "radeon_ucode.h"
  43#include "si.h"
  44#include "vce.h"
  45
  46#define SH_MEM_CONFIG_GFX_DEFAULT \
  47        ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  48
  49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  58
  59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  60MODULE_FIRMWARE("radeon/bonaire_me.bin");
  61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  68
  69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  78
  79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  80MODULE_FIRMWARE("radeon/hawaii_me.bin");
  81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  88
  89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  95
  96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  97MODULE_FIRMWARE("radeon/kaveri_me.bin");
  98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
 100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
 101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
 102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
 103
 104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 105MODULE_FIRMWARE("radeon/KABINI_me.bin");
 106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 110
 111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 112MODULE_FIRMWARE("radeon/kabini_me.bin");
 113MODULE_FIRMWARE("radeon/kabini_ce.bin");
 114MODULE_FIRMWARE("radeon/kabini_mec.bin");
 115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 117
 118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 124
 125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 126MODULE_FIRMWARE("radeon/mullins_me.bin");
 127MODULE_FIRMWARE("radeon/mullins_ce.bin");
 128MODULE_FIRMWARE("radeon/mullins_mec.bin");
 129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 131
 132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 133static void cik_rlc_stop(struct radeon_device *rdev);
 134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 135static void cik_program_aspm(struct radeon_device *rdev);
 136static void cik_init_pg(struct radeon_device *rdev);
 137static void cik_init_cg(struct radeon_device *rdev);
 138static void cik_fini_pg(struct radeon_device *rdev);
 139static void cik_fini_cg(struct radeon_device *rdev);
 140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 141                                          bool enable);
 142
 143/**
 144 * cik_get_allowed_info_register - fetch the register for the info ioctl
 145 *
 146 * @rdev: radeon_device pointer
 147 * @reg: register offset in bytes
 148 * @val: register value
 149 *
 150 * Returns 0 for success or -EINVAL for an invalid register
 151 *
 152 */
 153int cik_get_allowed_info_register(struct radeon_device *rdev,
 154                                  u32 reg, u32 *val)
 155{
 156        switch (reg) {
 157        case GRBM_STATUS:
 158        case GRBM_STATUS2:
 159        case GRBM_STATUS_SE0:
 160        case GRBM_STATUS_SE1:
 161        case GRBM_STATUS_SE2:
 162        case GRBM_STATUS_SE3:
 163        case SRBM_STATUS:
 164        case SRBM_STATUS2:
 165        case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 166        case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 167        case UVD_STATUS:
 168        /* TODO VCE */
 169                *val = RREG32(reg);
 170                return 0;
 171        default:
 172                return -EINVAL;
 173        }
 174}
 175
 176/*
 177 * Indirect registers accessor
 178 */
 179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 180{
 181        unsigned long flags;
 182        u32 r;
 183
 184        spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 185        WREG32(CIK_DIDT_IND_INDEX, (reg));
 186        r = RREG32(CIK_DIDT_IND_DATA);
 187        spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 188        return r;
 189}
 190
 191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 192{
 193        unsigned long flags;
 194
 195        spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 196        WREG32(CIK_DIDT_IND_INDEX, (reg));
 197        WREG32(CIK_DIDT_IND_DATA, (v));
 198        spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 199}
 200
 201/* get temperature in millidegrees */
 202int ci_get_temp(struct radeon_device *rdev)
 203{
 204        u32 temp;
 205        int actual_temp = 0;
 206
 207        temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 208                CTF_TEMP_SHIFT;
 209
 210        if (temp & 0x200)
 211                actual_temp = 255;
 212        else
 213                actual_temp = temp & 0x1ff;
 214
 215        return actual_temp * 1000;
 216}
 217
 218/* get temperature in millidegrees */
 219int kv_get_temp(struct radeon_device *rdev)
 220{
 221        u32 temp;
 222        int actual_temp = 0;
 223
 224        temp = RREG32_SMC(0xC0300E0C);
 225
 226        if (temp)
 227                actual_temp = (temp / 8) - 49;
 228        else
 229                actual_temp = 0;
 230
 231        return actual_temp * 1000;
 232}
 233
 234/*
 235 * Indirect registers accessor
 236 */
 237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 238{
 239        unsigned long flags;
 240        u32 r;
 241
 242        spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 243        WREG32(PCIE_INDEX, reg);
 244        (void)RREG32(PCIE_INDEX);
 245        r = RREG32(PCIE_DATA);
 246        spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 247        return r;
 248}
 249
 250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 251{
 252        unsigned long flags;
 253
 254        spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 255        WREG32(PCIE_INDEX, reg);
 256        (void)RREG32(PCIE_INDEX);
 257        WREG32(PCIE_DATA, v);
 258        (void)RREG32(PCIE_DATA);
 259        spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260}
 261
 262static const u32 spectre_rlc_save_restore_register_list[] =
 263{
 264        (0x0e00 << 16) | (0xc12c >> 2),
 265        0x00000000,
 266        (0x0e00 << 16) | (0xc140 >> 2),
 267        0x00000000,
 268        (0x0e00 << 16) | (0xc150 >> 2),
 269        0x00000000,
 270        (0x0e00 << 16) | (0xc15c >> 2),
 271        0x00000000,
 272        (0x0e00 << 16) | (0xc168 >> 2),
 273        0x00000000,
 274        (0x0e00 << 16) | (0xc170 >> 2),
 275        0x00000000,
 276        (0x0e00 << 16) | (0xc178 >> 2),
 277        0x00000000,
 278        (0x0e00 << 16) | (0xc204 >> 2),
 279        0x00000000,
 280        (0x0e00 << 16) | (0xc2b4 >> 2),
 281        0x00000000,
 282        (0x0e00 << 16) | (0xc2b8 >> 2),
 283        0x00000000,
 284        (0x0e00 << 16) | (0xc2bc >> 2),
 285        0x00000000,
 286        (0x0e00 << 16) | (0xc2c0 >> 2),
 287        0x00000000,
 288        (0x0e00 << 16) | (0x8228 >> 2),
 289        0x00000000,
 290        (0x0e00 << 16) | (0x829c >> 2),
 291        0x00000000,
 292        (0x0e00 << 16) | (0x869c >> 2),
 293        0x00000000,
 294        (0x0600 << 16) | (0x98f4 >> 2),
 295        0x00000000,
 296        (0x0e00 << 16) | (0x98f8 >> 2),
 297        0x00000000,
 298        (0x0e00 << 16) | (0x9900 >> 2),
 299        0x00000000,
 300        (0x0e00 << 16) | (0xc260 >> 2),
 301        0x00000000,
 302        (0x0e00 << 16) | (0x90e8 >> 2),
 303        0x00000000,
 304        (0x0e00 << 16) | (0x3c000 >> 2),
 305        0x00000000,
 306        (0x0e00 << 16) | (0x3c00c >> 2),
 307        0x00000000,
 308        (0x0e00 << 16) | (0x8c1c >> 2),
 309        0x00000000,
 310        (0x0e00 << 16) | (0x9700 >> 2),
 311        0x00000000,
 312        (0x0e00 << 16) | (0xcd20 >> 2),
 313        0x00000000,
 314        (0x4e00 << 16) | (0xcd20 >> 2),
 315        0x00000000,
 316        (0x5e00 << 16) | (0xcd20 >> 2),
 317        0x00000000,
 318        (0x6e00 << 16) | (0xcd20 >> 2),
 319        0x00000000,
 320        (0x7e00 << 16) | (0xcd20 >> 2),
 321        0x00000000,
 322        (0x8e00 << 16) | (0xcd20 >> 2),
 323        0x00000000,
 324        (0x9e00 << 16) | (0xcd20 >> 2),
 325        0x00000000,
 326        (0xae00 << 16) | (0xcd20 >> 2),
 327        0x00000000,
 328        (0xbe00 << 16) | (0xcd20 >> 2),
 329        0x00000000,
 330        (0x0e00 << 16) | (0x89bc >> 2),
 331        0x00000000,
 332        (0x0e00 << 16) | (0x8900 >> 2),
 333        0x00000000,
 334        0x3,
 335        (0x0e00 << 16) | (0xc130 >> 2),
 336        0x00000000,
 337        (0x0e00 << 16) | (0xc134 >> 2),
 338        0x00000000,
 339        (0x0e00 << 16) | (0xc1fc >> 2),
 340        0x00000000,
 341        (0x0e00 << 16) | (0xc208 >> 2),
 342        0x00000000,
 343        (0x0e00 << 16) | (0xc264 >> 2),
 344        0x00000000,
 345        (0x0e00 << 16) | (0xc268 >> 2),
 346        0x00000000,
 347        (0x0e00 << 16) | (0xc26c >> 2),
 348        0x00000000,
 349        (0x0e00 << 16) | (0xc270 >> 2),
 350        0x00000000,
 351        (0x0e00 << 16) | (0xc274 >> 2),
 352        0x00000000,
 353        (0x0e00 << 16) | (0xc278 >> 2),
 354        0x00000000,
 355        (0x0e00 << 16) | (0xc27c >> 2),
 356        0x00000000,
 357        (0x0e00 << 16) | (0xc280 >> 2),
 358        0x00000000,
 359        (0x0e00 << 16) | (0xc284 >> 2),
 360        0x00000000,
 361        (0x0e00 << 16) | (0xc288 >> 2),
 362        0x00000000,
 363        (0x0e00 << 16) | (0xc28c >> 2),
 364        0x00000000,
 365        (0x0e00 << 16) | (0xc290 >> 2),
 366        0x00000000,
 367        (0x0e00 << 16) | (0xc294 >> 2),
 368        0x00000000,
 369        (0x0e00 << 16) | (0xc298 >> 2),
 370        0x00000000,
 371        (0x0e00 << 16) | (0xc29c >> 2),
 372        0x00000000,
 373        (0x0e00 << 16) | (0xc2a0 >> 2),
 374        0x00000000,
 375        (0x0e00 << 16) | (0xc2a4 >> 2),
 376        0x00000000,
 377        (0x0e00 << 16) | (0xc2a8 >> 2),
 378        0x00000000,
 379        (0x0e00 << 16) | (0xc2ac  >> 2),
 380        0x00000000,
 381        (0x0e00 << 16) | (0xc2b0 >> 2),
 382        0x00000000,
 383        (0x0e00 << 16) | (0x301d0 >> 2),
 384        0x00000000,
 385        (0x0e00 << 16) | (0x30238 >> 2),
 386        0x00000000,
 387        (0x0e00 << 16) | (0x30250 >> 2),
 388        0x00000000,
 389        (0x0e00 << 16) | (0x30254 >> 2),
 390        0x00000000,
 391        (0x0e00 << 16) | (0x30258 >> 2),
 392        0x00000000,
 393        (0x0e00 << 16) | (0x3025c >> 2),
 394        0x00000000,
 395        (0x4e00 << 16) | (0xc900 >> 2),
 396        0x00000000,
 397        (0x5e00 << 16) | (0xc900 >> 2),
 398        0x00000000,
 399        (0x6e00 << 16) | (0xc900 >> 2),
 400        0x00000000,
 401        (0x7e00 << 16) | (0xc900 >> 2),
 402        0x00000000,
 403        (0x8e00 << 16) | (0xc900 >> 2),
 404        0x00000000,
 405        (0x9e00 << 16) | (0xc900 >> 2),
 406        0x00000000,
 407        (0xae00 << 16) | (0xc900 >> 2),
 408        0x00000000,
 409        (0xbe00 << 16) | (0xc900 >> 2),
 410        0x00000000,
 411        (0x4e00 << 16) | (0xc904 >> 2),
 412        0x00000000,
 413        (0x5e00 << 16) | (0xc904 >> 2),
 414        0x00000000,
 415        (0x6e00 << 16) | (0xc904 >> 2),
 416        0x00000000,
 417        (0x7e00 << 16) | (0xc904 >> 2),
 418        0x00000000,
 419        (0x8e00 << 16) | (0xc904 >> 2),
 420        0x00000000,
 421        (0x9e00 << 16) | (0xc904 >> 2),
 422        0x00000000,
 423        (0xae00 << 16) | (0xc904 >> 2),
 424        0x00000000,
 425        (0xbe00 << 16) | (0xc904 >> 2),
 426        0x00000000,
 427        (0x4e00 << 16) | (0xc908 >> 2),
 428        0x00000000,
 429        (0x5e00 << 16) | (0xc908 >> 2),
 430        0x00000000,
 431        (0x6e00 << 16) | (0xc908 >> 2),
 432        0x00000000,
 433        (0x7e00 << 16) | (0xc908 >> 2),
 434        0x00000000,
 435        (0x8e00 << 16) | (0xc908 >> 2),
 436        0x00000000,
 437        (0x9e00 << 16) | (0xc908 >> 2),
 438        0x00000000,
 439        (0xae00 << 16) | (0xc908 >> 2),
 440        0x00000000,
 441        (0xbe00 << 16) | (0xc908 >> 2),
 442        0x00000000,
 443        (0x4e00 << 16) | (0xc90c >> 2),
 444        0x00000000,
 445        (0x5e00 << 16) | (0xc90c >> 2),
 446        0x00000000,
 447        (0x6e00 << 16) | (0xc90c >> 2),
 448        0x00000000,
 449        (0x7e00 << 16) | (0xc90c >> 2),
 450        0x00000000,
 451        (0x8e00 << 16) | (0xc90c >> 2),
 452        0x00000000,
 453        (0x9e00 << 16) | (0xc90c >> 2),
 454        0x00000000,
 455        (0xae00 << 16) | (0xc90c >> 2),
 456        0x00000000,
 457        (0xbe00 << 16) | (0xc90c >> 2),
 458        0x00000000,
 459        (0x4e00 << 16) | (0xc910 >> 2),
 460        0x00000000,
 461        (0x5e00 << 16) | (0xc910 >> 2),
 462        0x00000000,
 463        (0x6e00 << 16) | (0xc910 >> 2),
 464        0x00000000,
 465        (0x7e00 << 16) | (0xc910 >> 2),
 466        0x00000000,
 467        (0x8e00 << 16) | (0xc910 >> 2),
 468        0x00000000,
 469        (0x9e00 << 16) | (0xc910 >> 2),
 470        0x00000000,
 471        (0xae00 << 16) | (0xc910 >> 2),
 472        0x00000000,
 473        (0xbe00 << 16) | (0xc910 >> 2),
 474        0x00000000,
 475        (0x0e00 << 16) | (0xc99c >> 2),
 476        0x00000000,
 477        (0x0e00 << 16) | (0x9834 >> 2),
 478        0x00000000,
 479        (0x0000 << 16) | (0x30f00 >> 2),
 480        0x00000000,
 481        (0x0001 << 16) | (0x30f00 >> 2),
 482        0x00000000,
 483        (0x0000 << 16) | (0x30f04 >> 2),
 484        0x00000000,
 485        (0x0001 << 16) | (0x30f04 >> 2),
 486        0x00000000,
 487        (0x0000 << 16) | (0x30f08 >> 2),
 488        0x00000000,
 489        (0x0001 << 16) | (0x30f08 >> 2),
 490        0x00000000,
 491        (0x0000 << 16) | (0x30f0c >> 2),
 492        0x00000000,
 493        (0x0001 << 16) | (0x30f0c >> 2),
 494        0x00000000,
 495        (0x0600 << 16) | (0x9b7c >> 2),
 496        0x00000000,
 497        (0x0e00 << 16) | (0x8a14 >> 2),
 498        0x00000000,
 499        (0x0e00 << 16) | (0x8a18 >> 2),
 500        0x00000000,
 501        (0x0600 << 16) | (0x30a00 >> 2),
 502        0x00000000,
 503        (0x0e00 << 16) | (0x8bf0 >> 2),
 504        0x00000000,
 505        (0x0e00 << 16) | (0x8bcc >> 2),
 506        0x00000000,
 507        (0x0e00 << 16) | (0x8b24 >> 2),
 508        0x00000000,
 509        (0x0e00 << 16) | (0x30a04 >> 2),
 510        0x00000000,
 511        (0x0600 << 16) | (0x30a10 >> 2),
 512        0x00000000,
 513        (0x0600 << 16) | (0x30a14 >> 2),
 514        0x00000000,
 515        (0x0600 << 16) | (0x30a18 >> 2),
 516        0x00000000,
 517        (0x0600 << 16) | (0x30a2c >> 2),
 518        0x00000000,
 519        (0x0e00 << 16) | (0xc700 >> 2),
 520        0x00000000,
 521        (0x0e00 << 16) | (0xc704 >> 2),
 522        0x00000000,
 523        (0x0e00 << 16) | (0xc708 >> 2),
 524        0x00000000,
 525        (0x0e00 << 16) | (0xc768 >> 2),
 526        0x00000000,
 527        (0x0400 << 16) | (0xc770 >> 2),
 528        0x00000000,
 529        (0x0400 << 16) | (0xc774 >> 2),
 530        0x00000000,
 531        (0x0400 << 16) | (0xc778 >> 2),
 532        0x00000000,
 533        (0x0400 << 16) | (0xc77c >> 2),
 534        0x00000000,
 535        (0x0400 << 16) | (0xc780 >> 2),
 536        0x00000000,
 537        (0x0400 << 16) | (0xc784 >> 2),
 538        0x00000000,
 539        (0x0400 << 16) | (0xc788 >> 2),
 540        0x00000000,
 541        (0x0400 << 16) | (0xc78c >> 2),
 542        0x00000000,
 543        (0x0400 << 16) | (0xc798 >> 2),
 544        0x00000000,
 545        (0x0400 << 16) | (0xc79c >> 2),
 546        0x00000000,
 547        (0x0400 << 16) | (0xc7a0 >> 2),
 548        0x00000000,
 549        (0x0400 << 16) | (0xc7a4 >> 2),
 550        0x00000000,
 551        (0x0400 << 16) | (0xc7a8 >> 2),
 552        0x00000000,
 553        (0x0400 << 16) | (0xc7ac >> 2),
 554        0x00000000,
 555        (0x0400 << 16) | (0xc7b0 >> 2),
 556        0x00000000,
 557        (0x0400 << 16) | (0xc7b4 >> 2),
 558        0x00000000,
 559        (0x0e00 << 16) | (0x9100 >> 2),
 560        0x00000000,
 561        (0x0e00 << 16) | (0x3c010 >> 2),
 562        0x00000000,
 563        (0x0e00 << 16) | (0x92a8 >> 2),
 564        0x00000000,
 565        (0x0e00 << 16) | (0x92ac >> 2),
 566        0x00000000,
 567        (0x0e00 << 16) | (0x92b4 >> 2),
 568        0x00000000,
 569        (0x0e00 << 16) | (0x92b8 >> 2),
 570        0x00000000,
 571        (0x0e00 << 16) | (0x92bc >> 2),
 572        0x00000000,
 573        (0x0e00 << 16) | (0x92c0 >> 2),
 574        0x00000000,
 575        (0x0e00 << 16) | (0x92c4 >> 2),
 576        0x00000000,
 577        (0x0e00 << 16) | (0x92c8 >> 2),
 578        0x00000000,
 579        (0x0e00 << 16) | (0x92cc >> 2),
 580        0x00000000,
 581        (0x0e00 << 16) | (0x92d0 >> 2),
 582        0x00000000,
 583        (0x0e00 << 16) | (0x8c00 >> 2),
 584        0x00000000,
 585        (0x0e00 << 16) | (0x8c04 >> 2),
 586        0x00000000,
 587        (0x0e00 << 16) | (0x8c20 >> 2),
 588        0x00000000,
 589        (0x0e00 << 16) | (0x8c38 >> 2),
 590        0x00000000,
 591        (0x0e00 << 16) | (0x8c3c >> 2),
 592        0x00000000,
 593        (0x0e00 << 16) | (0xae00 >> 2),
 594        0x00000000,
 595        (0x0e00 << 16) | (0x9604 >> 2),
 596        0x00000000,
 597        (0x0e00 << 16) | (0xac08 >> 2),
 598        0x00000000,
 599        (0x0e00 << 16) | (0xac0c >> 2),
 600        0x00000000,
 601        (0x0e00 << 16) | (0xac10 >> 2),
 602        0x00000000,
 603        (0x0e00 << 16) | (0xac14 >> 2),
 604        0x00000000,
 605        (0x0e00 << 16) | (0xac58 >> 2),
 606        0x00000000,
 607        (0x0e00 << 16) | (0xac68 >> 2),
 608        0x00000000,
 609        (0x0e00 << 16) | (0xac6c >> 2),
 610        0x00000000,
 611        (0x0e00 << 16) | (0xac70 >> 2),
 612        0x00000000,
 613        (0x0e00 << 16) | (0xac74 >> 2),
 614        0x00000000,
 615        (0x0e00 << 16) | (0xac78 >> 2),
 616        0x00000000,
 617        (0x0e00 << 16) | (0xac7c >> 2),
 618        0x00000000,
 619        (0x0e00 << 16) | (0xac80 >> 2),
 620        0x00000000,
 621        (0x0e00 << 16) | (0xac84 >> 2),
 622        0x00000000,
 623        (0x0e00 << 16) | (0xac88 >> 2),
 624        0x00000000,
 625        (0x0e00 << 16) | (0xac8c >> 2),
 626        0x00000000,
 627        (0x0e00 << 16) | (0x970c >> 2),
 628        0x00000000,
 629        (0x0e00 << 16) | (0x9714 >> 2),
 630        0x00000000,
 631        (0x0e00 << 16) | (0x9718 >> 2),
 632        0x00000000,
 633        (0x0e00 << 16) | (0x971c >> 2),
 634        0x00000000,
 635        (0x0e00 << 16) | (0x31068 >> 2),
 636        0x00000000,
 637        (0x4e00 << 16) | (0x31068 >> 2),
 638        0x00000000,
 639        (0x5e00 << 16) | (0x31068 >> 2),
 640        0x00000000,
 641        (0x6e00 << 16) | (0x31068 >> 2),
 642        0x00000000,
 643        (0x7e00 << 16) | (0x31068 >> 2),
 644        0x00000000,
 645        (0x8e00 << 16) | (0x31068 >> 2),
 646        0x00000000,
 647        (0x9e00 << 16) | (0x31068 >> 2),
 648        0x00000000,
 649        (0xae00 << 16) | (0x31068 >> 2),
 650        0x00000000,
 651        (0xbe00 << 16) | (0x31068 >> 2),
 652        0x00000000,
 653        (0x0e00 << 16) | (0xcd10 >> 2),
 654        0x00000000,
 655        (0x0e00 << 16) | (0xcd14 >> 2),
 656        0x00000000,
 657        (0x0e00 << 16) | (0x88b0 >> 2),
 658        0x00000000,
 659        (0x0e00 << 16) | (0x88b4 >> 2),
 660        0x00000000,
 661        (0x0e00 << 16) | (0x88b8 >> 2),
 662        0x00000000,
 663        (0x0e00 << 16) | (0x88bc >> 2),
 664        0x00000000,
 665        (0x0400 << 16) | (0x89c0 >> 2),
 666        0x00000000,
 667        (0x0e00 << 16) | (0x88c4 >> 2),
 668        0x00000000,
 669        (0x0e00 << 16) | (0x88c8 >> 2),
 670        0x00000000,
 671        (0x0e00 << 16) | (0x88d0 >> 2),
 672        0x00000000,
 673        (0x0e00 << 16) | (0x88d4 >> 2),
 674        0x00000000,
 675        (0x0e00 << 16) | (0x88d8 >> 2),
 676        0x00000000,
 677        (0x0e00 << 16) | (0x8980 >> 2),
 678        0x00000000,
 679        (0x0e00 << 16) | (0x30938 >> 2),
 680        0x00000000,
 681        (0x0e00 << 16) | (0x3093c >> 2),
 682        0x00000000,
 683        (0x0e00 << 16) | (0x30940 >> 2),
 684        0x00000000,
 685        (0x0e00 << 16) | (0x89a0 >> 2),
 686        0x00000000,
 687        (0x0e00 << 16) | (0x30900 >> 2),
 688        0x00000000,
 689        (0x0e00 << 16) | (0x30904 >> 2),
 690        0x00000000,
 691        (0x0e00 << 16) | (0x89b4 >> 2),
 692        0x00000000,
 693        (0x0e00 << 16) | (0x3c210 >> 2),
 694        0x00000000,
 695        (0x0e00 << 16) | (0x3c214 >> 2),
 696        0x00000000,
 697        (0x0e00 << 16) | (0x3c218 >> 2),
 698        0x00000000,
 699        (0x0e00 << 16) | (0x8904 >> 2),
 700        0x00000000,
 701        0x5,
 702        (0x0e00 << 16) | (0x8c28 >> 2),
 703        (0x0e00 << 16) | (0x8c2c >> 2),
 704        (0x0e00 << 16) | (0x8c30 >> 2),
 705        (0x0e00 << 16) | (0x8c34 >> 2),
 706        (0x0e00 << 16) | (0x9600 >> 2),
 707};
 708
 709static const u32 kalindi_rlc_save_restore_register_list[] =
 710{
 711        (0x0e00 << 16) | (0xc12c >> 2),
 712        0x00000000,
 713        (0x0e00 << 16) | (0xc140 >> 2),
 714        0x00000000,
 715        (0x0e00 << 16) | (0xc150 >> 2),
 716        0x00000000,
 717        (0x0e00 << 16) | (0xc15c >> 2),
 718        0x00000000,
 719        (0x0e00 << 16) | (0xc168 >> 2),
 720        0x00000000,
 721        (0x0e00 << 16) | (0xc170 >> 2),
 722        0x00000000,
 723        (0x0e00 << 16) | (0xc204 >> 2),
 724        0x00000000,
 725        (0x0e00 << 16) | (0xc2b4 >> 2),
 726        0x00000000,
 727        (0x0e00 << 16) | (0xc2b8 >> 2),
 728        0x00000000,
 729        (0x0e00 << 16) | (0xc2bc >> 2),
 730        0x00000000,
 731        (0x0e00 << 16) | (0xc2c0 >> 2),
 732        0x00000000,
 733        (0x0e00 << 16) | (0x8228 >> 2),
 734        0x00000000,
 735        (0x0e00 << 16) | (0x829c >> 2),
 736        0x00000000,
 737        (0x0e00 << 16) | (0x869c >> 2),
 738        0x00000000,
 739        (0x0600 << 16) | (0x98f4 >> 2),
 740        0x00000000,
 741        (0x0e00 << 16) | (0x98f8 >> 2),
 742        0x00000000,
 743        (0x0e00 << 16) | (0x9900 >> 2),
 744        0x00000000,
 745        (0x0e00 << 16) | (0xc260 >> 2),
 746        0x00000000,
 747        (0x0e00 << 16) | (0x90e8 >> 2),
 748        0x00000000,
 749        (0x0e00 << 16) | (0x3c000 >> 2),
 750        0x00000000,
 751        (0x0e00 << 16) | (0x3c00c >> 2),
 752        0x00000000,
 753        (0x0e00 << 16) | (0x8c1c >> 2),
 754        0x00000000,
 755        (0x0e00 << 16) | (0x9700 >> 2),
 756        0x00000000,
 757        (0x0e00 << 16) | (0xcd20 >> 2),
 758        0x00000000,
 759        (0x4e00 << 16) | (0xcd20 >> 2),
 760        0x00000000,
 761        (0x5e00 << 16) | (0xcd20 >> 2),
 762        0x00000000,
 763        (0x6e00 << 16) | (0xcd20 >> 2),
 764        0x00000000,
 765        (0x7e00 << 16) | (0xcd20 >> 2),
 766        0x00000000,
 767        (0x0e00 << 16) | (0x89bc >> 2),
 768        0x00000000,
 769        (0x0e00 << 16) | (0x8900 >> 2),
 770        0x00000000,
 771        0x3,
 772        (0x0e00 << 16) | (0xc130 >> 2),
 773        0x00000000,
 774        (0x0e00 << 16) | (0xc134 >> 2),
 775        0x00000000,
 776        (0x0e00 << 16) | (0xc1fc >> 2),
 777        0x00000000,
 778        (0x0e00 << 16) | (0xc208 >> 2),
 779        0x00000000,
 780        (0x0e00 << 16) | (0xc264 >> 2),
 781        0x00000000,
 782        (0x0e00 << 16) | (0xc268 >> 2),
 783        0x00000000,
 784        (0x0e00 << 16) | (0xc26c >> 2),
 785        0x00000000,
 786        (0x0e00 << 16) | (0xc270 >> 2),
 787        0x00000000,
 788        (0x0e00 << 16) | (0xc274 >> 2),
 789        0x00000000,
 790        (0x0e00 << 16) | (0xc28c >> 2),
 791        0x00000000,
 792        (0x0e00 << 16) | (0xc290 >> 2),
 793        0x00000000,
 794        (0x0e00 << 16) | (0xc294 >> 2),
 795        0x00000000,
 796        (0x0e00 << 16) | (0xc298 >> 2),
 797        0x00000000,
 798        (0x0e00 << 16) | (0xc2a0 >> 2),
 799        0x00000000,
 800        (0x0e00 << 16) | (0xc2a4 >> 2),
 801        0x00000000,
 802        (0x0e00 << 16) | (0xc2a8 >> 2),
 803        0x00000000,
 804        (0x0e00 << 16) | (0xc2ac >> 2),
 805        0x00000000,
 806        (0x0e00 << 16) | (0x301d0 >> 2),
 807        0x00000000,
 808        (0x0e00 << 16) | (0x30238 >> 2),
 809        0x00000000,
 810        (0x0e00 << 16) | (0x30250 >> 2),
 811        0x00000000,
 812        (0x0e00 << 16) | (0x30254 >> 2),
 813        0x00000000,
 814        (0x0e00 << 16) | (0x30258 >> 2),
 815        0x00000000,
 816        (0x0e00 << 16) | (0x3025c >> 2),
 817        0x00000000,
 818        (0x4e00 << 16) | (0xc900 >> 2),
 819        0x00000000,
 820        (0x5e00 << 16) | (0xc900 >> 2),
 821        0x00000000,
 822        (0x6e00 << 16) | (0xc900 >> 2),
 823        0x00000000,
 824        (0x7e00 << 16) | (0xc900 >> 2),
 825        0x00000000,
 826        (0x4e00 << 16) | (0xc904 >> 2),
 827        0x00000000,
 828        (0x5e00 << 16) | (0xc904 >> 2),
 829        0x00000000,
 830        (0x6e00 << 16) | (0xc904 >> 2),
 831        0x00000000,
 832        (0x7e00 << 16) | (0xc904 >> 2),
 833        0x00000000,
 834        (0x4e00 << 16) | (0xc908 >> 2),
 835        0x00000000,
 836        (0x5e00 << 16) | (0xc908 >> 2),
 837        0x00000000,
 838        (0x6e00 << 16) | (0xc908 >> 2),
 839        0x00000000,
 840        (0x7e00 << 16) | (0xc908 >> 2),
 841        0x00000000,
 842        (0x4e00 << 16) | (0xc90c >> 2),
 843        0x00000000,
 844        (0x5e00 << 16) | (0xc90c >> 2),
 845        0x00000000,
 846        (0x6e00 << 16) | (0xc90c >> 2),
 847        0x00000000,
 848        (0x7e00 << 16) | (0xc90c >> 2),
 849        0x00000000,
 850        (0x4e00 << 16) | (0xc910 >> 2),
 851        0x00000000,
 852        (0x5e00 << 16) | (0xc910 >> 2),
 853        0x00000000,
 854        (0x6e00 << 16) | (0xc910 >> 2),
 855        0x00000000,
 856        (0x7e00 << 16) | (0xc910 >> 2),
 857        0x00000000,
 858        (0x0e00 << 16) | (0xc99c >> 2),
 859        0x00000000,
 860        (0x0e00 << 16) | (0x9834 >> 2),
 861        0x00000000,
 862        (0x0000 << 16) | (0x30f00 >> 2),
 863        0x00000000,
 864        (0x0000 << 16) | (0x30f04 >> 2),
 865        0x00000000,
 866        (0x0000 << 16) | (0x30f08 >> 2),
 867        0x00000000,
 868        (0x0000 << 16) | (0x30f0c >> 2),
 869        0x00000000,
 870        (0x0600 << 16) | (0x9b7c >> 2),
 871        0x00000000,
 872        (0x0e00 << 16) | (0x8a14 >> 2),
 873        0x00000000,
 874        (0x0e00 << 16) | (0x8a18 >> 2),
 875        0x00000000,
 876        (0x0600 << 16) | (0x30a00 >> 2),
 877        0x00000000,
 878        (0x0e00 << 16) | (0x8bf0 >> 2),
 879        0x00000000,
 880        (0x0e00 << 16) | (0x8bcc >> 2),
 881        0x00000000,
 882        (0x0e00 << 16) | (0x8b24 >> 2),
 883        0x00000000,
 884        (0x0e00 << 16) | (0x30a04 >> 2),
 885        0x00000000,
 886        (0x0600 << 16) | (0x30a10 >> 2),
 887        0x00000000,
 888        (0x0600 << 16) | (0x30a14 >> 2),
 889        0x00000000,
 890        (0x0600 << 16) | (0x30a18 >> 2),
 891        0x00000000,
 892        (0x0600 << 16) | (0x30a2c >> 2),
 893        0x00000000,
 894        (0x0e00 << 16) | (0xc700 >> 2),
 895        0x00000000,
 896        (0x0e00 << 16) | (0xc704 >> 2),
 897        0x00000000,
 898        (0x0e00 << 16) | (0xc708 >> 2),
 899        0x00000000,
 900        (0x0e00 << 16) | (0xc768 >> 2),
 901        0x00000000,
 902        (0x0400 << 16) | (0xc770 >> 2),
 903        0x00000000,
 904        (0x0400 << 16) | (0xc774 >> 2),
 905        0x00000000,
 906        (0x0400 << 16) | (0xc798 >> 2),
 907        0x00000000,
 908        (0x0400 << 16) | (0xc79c >> 2),
 909        0x00000000,
 910        (0x0e00 << 16) | (0x9100 >> 2),
 911        0x00000000,
 912        (0x0e00 << 16) | (0x3c010 >> 2),
 913        0x00000000,
 914        (0x0e00 << 16) | (0x8c00 >> 2),
 915        0x00000000,
 916        (0x0e00 << 16) | (0x8c04 >> 2),
 917        0x00000000,
 918        (0x0e00 << 16) | (0x8c20 >> 2),
 919        0x00000000,
 920        (0x0e00 << 16) | (0x8c38 >> 2),
 921        0x00000000,
 922        (0x0e00 << 16) | (0x8c3c >> 2),
 923        0x00000000,
 924        (0x0e00 << 16) | (0xae00 >> 2),
 925        0x00000000,
 926        (0x0e00 << 16) | (0x9604 >> 2),
 927        0x00000000,
 928        (0x0e00 << 16) | (0xac08 >> 2),
 929        0x00000000,
 930        (0x0e00 << 16) | (0xac0c >> 2),
 931        0x00000000,
 932        (0x0e00 << 16) | (0xac10 >> 2),
 933        0x00000000,
 934        (0x0e00 << 16) | (0xac14 >> 2),
 935        0x00000000,
 936        (0x0e00 << 16) | (0xac58 >> 2),
 937        0x00000000,
 938        (0x0e00 << 16) | (0xac68 >> 2),
 939        0x00000000,
 940        (0x0e00 << 16) | (0xac6c >> 2),
 941        0x00000000,
 942        (0x0e00 << 16) | (0xac70 >> 2),
 943        0x00000000,
 944        (0x0e00 << 16) | (0xac74 >> 2),
 945        0x00000000,
 946        (0x0e00 << 16) | (0xac78 >> 2),
 947        0x00000000,
 948        (0x0e00 << 16) | (0xac7c >> 2),
 949        0x00000000,
 950        (0x0e00 << 16) | (0xac80 >> 2),
 951        0x00000000,
 952        (0x0e00 << 16) | (0xac84 >> 2),
 953        0x00000000,
 954        (0x0e00 << 16) | (0xac88 >> 2),
 955        0x00000000,
 956        (0x0e00 << 16) | (0xac8c >> 2),
 957        0x00000000,
 958        (0x0e00 << 16) | (0x970c >> 2),
 959        0x00000000,
 960        (0x0e00 << 16) | (0x9714 >> 2),
 961        0x00000000,
 962        (0x0e00 << 16) | (0x9718 >> 2),
 963        0x00000000,
 964        (0x0e00 << 16) | (0x971c >> 2),
 965        0x00000000,
 966        (0x0e00 << 16) | (0x31068 >> 2),
 967        0x00000000,
 968        (0x4e00 << 16) | (0x31068 >> 2),
 969        0x00000000,
 970        (0x5e00 << 16) | (0x31068 >> 2),
 971        0x00000000,
 972        (0x6e00 << 16) | (0x31068 >> 2),
 973        0x00000000,
 974        (0x7e00 << 16) | (0x31068 >> 2),
 975        0x00000000,
 976        (0x0e00 << 16) | (0xcd10 >> 2),
 977        0x00000000,
 978        (0x0e00 << 16) | (0xcd14 >> 2),
 979        0x00000000,
 980        (0x0e00 << 16) | (0x88b0 >> 2),
 981        0x00000000,
 982        (0x0e00 << 16) | (0x88b4 >> 2),
 983        0x00000000,
 984        (0x0e00 << 16) | (0x88b8 >> 2),
 985        0x00000000,
 986        (0x0e00 << 16) | (0x88bc >> 2),
 987        0x00000000,
 988        (0x0400 << 16) | (0x89c0 >> 2),
 989        0x00000000,
 990        (0x0e00 << 16) | (0x88c4 >> 2),
 991        0x00000000,
 992        (0x0e00 << 16) | (0x88c8 >> 2),
 993        0x00000000,
 994        (0x0e00 << 16) | (0x88d0 >> 2),
 995        0x00000000,
 996        (0x0e00 << 16) | (0x88d4 >> 2),
 997        0x00000000,
 998        (0x0e00 << 16) | (0x88d8 >> 2),
 999        0x00000000,
1000        (0x0e00 << 16) | (0x8980 >> 2),
1001        0x00000000,
1002        (0x0e00 << 16) | (0x30938 >> 2),
1003        0x00000000,
1004        (0x0e00 << 16) | (0x3093c >> 2),
1005        0x00000000,
1006        (0x0e00 << 16) | (0x30940 >> 2),
1007        0x00000000,
1008        (0x0e00 << 16) | (0x89a0 >> 2),
1009        0x00000000,
1010        (0x0e00 << 16) | (0x30900 >> 2),
1011        0x00000000,
1012        (0x0e00 << 16) | (0x30904 >> 2),
1013        0x00000000,
1014        (0x0e00 << 16) | (0x89b4 >> 2),
1015        0x00000000,
1016        (0x0e00 << 16) | (0x3e1fc >> 2),
1017        0x00000000,
1018        (0x0e00 << 16) | (0x3c210 >> 2),
1019        0x00000000,
1020        (0x0e00 << 16) | (0x3c214 >> 2),
1021        0x00000000,
1022        (0x0e00 << 16) | (0x3c218 >> 2),
1023        0x00000000,
1024        (0x0e00 << 16) | (0x8904 >> 2),
1025        0x00000000,
1026        0x5,
1027        (0x0e00 << 16) | (0x8c28 >> 2),
1028        (0x0e00 << 16) | (0x8c2c >> 2),
1029        (0x0e00 << 16) | (0x8c30 >> 2),
1030        (0x0e00 << 16) | (0x8c34 >> 2),
1031        (0x0e00 << 16) | (0x9600 >> 2),
1032};
1033
1034static const u32 bonaire_golden_spm_registers[] =
1035{
1036        0x30800, 0xe0ffffff, 0xe0000000
1037};
1038
1039static const u32 bonaire_golden_common_registers[] =
1040{
1041        0xc770, 0xffffffff, 0x00000800,
1042        0xc774, 0xffffffff, 0x00000800,
1043        0xc798, 0xffffffff, 0x00007fbf,
1044        0xc79c, 0xffffffff, 0x00007faf
1045};
1046
1047static const u32 bonaire_golden_registers[] =
1048{
1049        0x3354, 0x00000333, 0x00000333,
1050        0x3350, 0x000c0fc0, 0x00040200,
1051        0x9a10, 0x00010000, 0x00058208,
1052        0x3c000, 0xffff1fff, 0x00140000,
1053        0x3c200, 0xfdfc0fff, 0x00000100,
1054        0x3c234, 0x40000000, 0x40000200,
1055        0x9830, 0xffffffff, 0x00000000,
1056        0x9834, 0xf00fffff, 0x00000400,
1057        0x9838, 0x0002021c, 0x00020200,
1058        0xc78, 0x00000080, 0x00000000,
1059        0x5bb0, 0x000000f0, 0x00000070,
1060        0x5bc0, 0xf0311fff, 0x80300000,
1061        0x98f8, 0x73773777, 0x12010001,
1062        0x350c, 0x00810000, 0x408af000,
1063        0x7030, 0x31000111, 0x00000011,
1064        0x2f48, 0x73773777, 0x12010001,
1065        0x220c, 0x00007fb6, 0x0021a1b1,
1066        0x2210, 0x00007fb6, 0x002021b1,
1067        0x2180, 0x00007fb6, 0x00002191,
1068        0x2218, 0x00007fb6, 0x002121b1,
1069        0x221c, 0x00007fb6, 0x002021b1,
1070        0x21dc, 0x00007fb6, 0x00002191,
1071        0x21e0, 0x00007fb6, 0x00002191,
1072        0x3628, 0x0000003f, 0x0000000a,
1073        0x362c, 0x0000003f, 0x0000000a,
1074        0x2ae4, 0x00073ffe, 0x000022a2,
1075        0x240c, 0x000007ff, 0x00000000,
1076        0x8a14, 0xf000003f, 0x00000007,
1077        0x8bf0, 0x00002001, 0x00000001,
1078        0x8b24, 0xffffffff, 0x00ffffff,
1079        0x30a04, 0x0000ff0f, 0x00000000,
1080        0x28a4c, 0x07ffffff, 0x06000000,
1081        0x4d8, 0x00000fff, 0x00000100,
1082        0x3e78, 0x00000001, 0x00000002,
1083        0x9100, 0x03000000, 0x0362c688,
1084        0x8c00, 0x000000ff, 0x00000001,
1085        0xe40, 0x00001fff, 0x00001fff,
1086        0x9060, 0x0000007f, 0x00000020,
1087        0x9508, 0x00010000, 0x00010000,
1088        0xac14, 0x000003ff, 0x000000f3,
1089        0xac0c, 0xffffffff, 0x00001032
1090};
1091
1092static const u32 bonaire_mgcg_cgcg_init[] =
1093{
1094        0xc420, 0xffffffff, 0xfffffffc,
1095        0x30800, 0xffffffff, 0xe0000000,
1096        0x3c2a0, 0xffffffff, 0x00000100,
1097        0x3c208, 0xffffffff, 0x00000100,
1098        0x3c2c0, 0xffffffff, 0xc0000100,
1099        0x3c2c8, 0xffffffff, 0xc0000100,
1100        0x3c2c4, 0xffffffff, 0xc0000100,
1101        0x55e4, 0xffffffff, 0x00600100,
1102        0x3c280, 0xffffffff, 0x00000100,
1103        0x3c214, 0xffffffff, 0x06000100,
1104        0x3c220, 0xffffffff, 0x00000100,
1105        0x3c218, 0xffffffff, 0x06000100,
1106        0x3c204, 0xffffffff, 0x00000100,
1107        0x3c2e0, 0xffffffff, 0x00000100,
1108        0x3c224, 0xffffffff, 0x00000100,
1109        0x3c200, 0xffffffff, 0x00000100,
1110        0x3c230, 0xffffffff, 0x00000100,
1111        0x3c234, 0xffffffff, 0x00000100,
1112        0x3c250, 0xffffffff, 0x00000100,
1113        0x3c254, 0xffffffff, 0x00000100,
1114        0x3c258, 0xffffffff, 0x00000100,
1115        0x3c25c, 0xffffffff, 0x00000100,
1116        0x3c260, 0xffffffff, 0x00000100,
1117        0x3c27c, 0xffffffff, 0x00000100,
1118        0x3c278, 0xffffffff, 0x00000100,
1119        0x3c210, 0xffffffff, 0x06000100,
1120        0x3c290, 0xffffffff, 0x00000100,
1121        0x3c274, 0xffffffff, 0x00000100,
1122        0x3c2b4, 0xffffffff, 0x00000100,
1123        0x3c2b0, 0xffffffff, 0x00000100,
1124        0x3c270, 0xffffffff, 0x00000100,
1125        0x30800, 0xffffffff, 0xe0000000,
1126        0x3c020, 0xffffffff, 0x00010000,
1127        0x3c024, 0xffffffff, 0x00030002,
1128        0x3c028, 0xffffffff, 0x00040007,
1129        0x3c02c, 0xffffffff, 0x00060005,
1130        0x3c030, 0xffffffff, 0x00090008,
1131        0x3c034, 0xffffffff, 0x00010000,
1132        0x3c038, 0xffffffff, 0x00030002,
1133        0x3c03c, 0xffffffff, 0x00040007,
1134        0x3c040, 0xffffffff, 0x00060005,
1135        0x3c044, 0xffffffff, 0x00090008,
1136        0x3c048, 0xffffffff, 0x00010000,
1137        0x3c04c, 0xffffffff, 0x00030002,
1138        0x3c050, 0xffffffff, 0x00040007,
1139        0x3c054, 0xffffffff, 0x00060005,
1140        0x3c058, 0xffffffff, 0x00090008,
1141        0x3c05c, 0xffffffff, 0x00010000,
1142        0x3c060, 0xffffffff, 0x00030002,
1143        0x3c064, 0xffffffff, 0x00040007,
1144        0x3c068, 0xffffffff, 0x00060005,
1145        0x3c06c, 0xffffffff, 0x00090008,
1146        0x3c070, 0xffffffff, 0x00010000,
1147        0x3c074, 0xffffffff, 0x00030002,
1148        0x3c078, 0xffffffff, 0x00040007,
1149        0x3c07c, 0xffffffff, 0x00060005,
1150        0x3c080, 0xffffffff, 0x00090008,
1151        0x3c084, 0xffffffff, 0x00010000,
1152        0x3c088, 0xffffffff, 0x00030002,
1153        0x3c08c, 0xffffffff, 0x00040007,
1154        0x3c090, 0xffffffff, 0x00060005,
1155        0x3c094, 0xffffffff, 0x00090008,
1156        0x3c098, 0xffffffff, 0x00010000,
1157        0x3c09c, 0xffffffff, 0x00030002,
1158        0x3c0a0, 0xffffffff, 0x00040007,
1159        0x3c0a4, 0xffffffff, 0x00060005,
1160        0x3c0a8, 0xffffffff, 0x00090008,
1161        0x3c000, 0xffffffff, 0x96e00200,
1162        0x8708, 0xffffffff, 0x00900100,
1163        0xc424, 0xffffffff, 0x0020003f,
1164        0x38, 0xffffffff, 0x0140001c,
1165        0x3c, 0x000f0000, 0x000f0000,
1166        0x220, 0xffffffff, 0xC060000C,
1167        0x224, 0xc0000fff, 0x00000100,
1168        0xf90, 0xffffffff, 0x00000100,
1169        0xf98, 0x00000101, 0x00000000,
1170        0x20a8, 0xffffffff, 0x00000104,
1171        0x55e4, 0xff000fff, 0x00000100,
1172        0x30cc, 0xc0000fff, 0x00000104,
1173        0xc1e4, 0x00000001, 0x00000001,
1174        0xd00c, 0xff000ff0, 0x00000100,
1175        0xd80c, 0xff000ff0, 0x00000100
1176};
1177
1178static const u32 spectre_golden_spm_registers[] =
1179{
1180        0x30800, 0xe0ffffff, 0xe0000000
1181};
1182
1183static const u32 spectre_golden_common_registers[] =
1184{
1185        0xc770, 0xffffffff, 0x00000800,
1186        0xc774, 0xffffffff, 0x00000800,
1187        0xc798, 0xffffffff, 0x00007fbf,
1188        0xc79c, 0xffffffff, 0x00007faf
1189};
1190
1191static const u32 spectre_golden_registers[] =
1192{
1193        0x3c000, 0xffff1fff, 0x96940200,
1194        0x3c00c, 0xffff0001, 0xff000000,
1195        0x3c200, 0xfffc0fff, 0x00000100,
1196        0x6ed8, 0x00010101, 0x00010000,
1197        0x9834, 0xf00fffff, 0x00000400,
1198        0x9838, 0xfffffffc, 0x00020200,
1199        0x5bb0, 0x000000f0, 0x00000070,
1200        0x5bc0, 0xf0311fff, 0x80300000,
1201        0x98f8, 0x73773777, 0x12010001,
1202        0x9b7c, 0x00ff0000, 0x00fc0000,
1203        0x2f48, 0x73773777, 0x12010001,
1204        0x8a14, 0xf000003f, 0x00000007,
1205        0x8b24, 0xffffffff, 0x00ffffff,
1206        0x28350, 0x3f3f3fff, 0x00000082,
1207        0x28354, 0x0000003f, 0x00000000,
1208        0x3e78, 0x00000001, 0x00000002,
1209        0x913c, 0xffff03df, 0x00000004,
1210        0xc768, 0x00000008, 0x00000008,
1211        0x8c00, 0x000008ff, 0x00000800,
1212        0x9508, 0x00010000, 0x00010000,
1213        0xac0c, 0xffffffff, 0x54763210,
1214        0x214f8, 0x01ff01ff, 0x00000002,
1215        0x21498, 0x007ff800, 0x00200000,
1216        0x2015c, 0xffffffff, 0x00000f40,
1217        0x30934, 0xffffffff, 0x00000001
1218};
1219
1220static const u32 spectre_mgcg_cgcg_init[] =
1221{
1222        0xc420, 0xffffffff, 0xfffffffc,
1223        0x30800, 0xffffffff, 0xe0000000,
1224        0x3c2a0, 0xffffffff, 0x00000100,
1225        0x3c208, 0xffffffff, 0x00000100,
1226        0x3c2c0, 0xffffffff, 0x00000100,
1227        0x3c2c8, 0xffffffff, 0x00000100,
1228        0x3c2c4, 0xffffffff, 0x00000100,
1229        0x55e4, 0xffffffff, 0x00600100,
1230        0x3c280, 0xffffffff, 0x00000100,
1231        0x3c214, 0xffffffff, 0x06000100,
1232        0x3c220, 0xffffffff, 0x00000100,
1233        0x3c218, 0xffffffff, 0x06000100,
1234        0x3c204, 0xffffffff, 0x00000100,
1235        0x3c2e0, 0xffffffff, 0x00000100,
1236        0x3c224, 0xffffffff, 0x00000100,
1237        0x3c200, 0xffffffff, 0x00000100,
1238        0x3c230, 0xffffffff, 0x00000100,
1239        0x3c234, 0xffffffff, 0x00000100,
1240        0x3c250, 0xffffffff, 0x00000100,
1241        0x3c254, 0xffffffff, 0x00000100,
1242        0x3c258, 0xffffffff, 0x00000100,
1243        0x3c25c, 0xffffffff, 0x00000100,
1244        0x3c260, 0xffffffff, 0x00000100,
1245        0x3c27c, 0xffffffff, 0x00000100,
1246        0x3c278, 0xffffffff, 0x00000100,
1247        0x3c210, 0xffffffff, 0x06000100,
1248        0x3c290, 0xffffffff, 0x00000100,
1249        0x3c274, 0xffffffff, 0x00000100,
1250        0x3c2b4, 0xffffffff, 0x00000100,
1251        0x3c2b0, 0xffffffff, 0x00000100,
1252        0x3c270, 0xffffffff, 0x00000100,
1253        0x30800, 0xffffffff, 0xe0000000,
1254        0x3c020, 0xffffffff, 0x00010000,
1255        0x3c024, 0xffffffff, 0x00030002,
1256        0x3c028, 0xffffffff, 0x00040007,
1257        0x3c02c, 0xffffffff, 0x00060005,
1258        0x3c030, 0xffffffff, 0x00090008,
1259        0x3c034, 0xffffffff, 0x00010000,
1260        0x3c038, 0xffffffff, 0x00030002,
1261        0x3c03c, 0xffffffff, 0x00040007,
1262        0x3c040, 0xffffffff, 0x00060005,
1263        0x3c044, 0xffffffff, 0x00090008,
1264        0x3c048, 0xffffffff, 0x00010000,
1265        0x3c04c, 0xffffffff, 0x00030002,
1266        0x3c050, 0xffffffff, 0x00040007,
1267        0x3c054, 0xffffffff, 0x00060005,
1268        0x3c058, 0xffffffff, 0x00090008,
1269        0x3c05c, 0xffffffff, 0x00010000,
1270        0x3c060, 0xffffffff, 0x00030002,
1271        0x3c064, 0xffffffff, 0x00040007,
1272        0x3c068, 0xffffffff, 0x00060005,
1273        0x3c06c, 0xffffffff, 0x00090008,
1274        0x3c070, 0xffffffff, 0x00010000,
1275        0x3c074, 0xffffffff, 0x00030002,
1276        0x3c078, 0xffffffff, 0x00040007,
1277        0x3c07c, 0xffffffff, 0x00060005,
1278        0x3c080, 0xffffffff, 0x00090008,
1279        0x3c084, 0xffffffff, 0x00010000,
1280        0x3c088, 0xffffffff, 0x00030002,
1281        0x3c08c, 0xffffffff, 0x00040007,
1282        0x3c090, 0xffffffff, 0x00060005,
1283        0x3c094, 0xffffffff, 0x00090008,
1284        0x3c098, 0xffffffff, 0x00010000,
1285        0x3c09c, 0xffffffff, 0x00030002,
1286        0x3c0a0, 0xffffffff, 0x00040007,
1287        0x3c0a4, 0xffffffff, 0x00060005,
1288        0x3c0a8, 0xffffffff, 0x00090008,
1289        0x3c0ac, 0xffffffff, 0x00010000,
1290        0x3c0b0, 0xffffffff, 0x00030002,
1291        0x3c0b4, 0xffffffff, 0x00040007,
1292        0x3c0b8, 0xffffffff, 0x00060005,
1293        0x3c0bc, 0xffffffff, 0x00090008,
1294        0x3c000, 0xffffffff, 0x96e00200,
1295        0x8708, 0xffffffff, 0x00900100,
1296        0xc424, 0xffffffff, 0x0020003f,
1297        0x38, 0xffffffff, 0x0140001c,
1298        0x3c, 0x000f0000, 0x000f0000,
1299        0x220, 0xffffffff, 0xC060000C,
1300        0x224, 0xc0000fff, 0x00000100,
1301        0xf90, 0xffffffff, 0x00000100,
1302        0xf98, 0x00000101, 0x00000000,
1303        0x20a8, 0xffffffff, 0x00000104,
1304        0x55e4, 0xff000fff, 0x00000100,
1305        0x30cc, 0xc0000fff, 0x00000104,
1306        0xc1e4, 0x00000001, 0x00000001,
1307        0xd00c, 0xff000ff0, 0x00000100,
1308        0xd80c, 0xff000ff0, 0x00000100
1309};
1310
1311static const u32 kalindi_golden_spm_registers[] =
1312{
1313        0x30800, 0xe0ffffff, 0xe0000000
1314};
1315
1316static const u32 kalindi_golden_common_registers[] =
1317{
1318        0xc770, 0xffffffff, 0x00000800,
1319        0xc774, 0xffffffff, 0x00000800,
1320        0xc798, 0xffffffff, 0x00007fbf,
1321        0xc79c, 0xffffffff, 0x00007faf
1322};
1323
1324static const u32 kalindi_golden_registers[] =
1325{
1326        0x3c000, 0xffffdfff, 0x6e944040,
1327        0x55e4, 0xff607fff, 0xfc000100,
1328        0x3c220, 0xff000fff, 0x00000100,
1329        0x3c224, 0xff000fff, 0x00000100,
1330        0x3c200, 0xfffc0fff, 0x00000100,
1331        0x6ed8, 0x00010101, 0x00010000,
1332        0x9830, 0xffffffff, 0x00000000,
1333        0x9834, 0xf00fffff, 0x00000400,
1334        0x5bb0, 0x000000f0, 0x00000070,
1335        0x5bc0, 0xf0311fff, 0x80300000,
1336        0x98f8, 0x73773777, 0x12010001,
1337        0x98fc, 0xffffffff, 0x00000010,
1338        0x9b7c, 0x00ff0000, 0x00fc0000,
1339        0x8030, 0x00001f0f, 0x0000100a,
1340        0x2f48, 0x73773777, 0x12010001,
1341        0x2408, 0x000fffff, 0x000c007f,
1342        0x8a14, 0xf000003f, 0x00000007,
1343        0x8b24, 0x3fff3fff, 0x00ffcfff,
1344        0x30a04, 0x0000ff0f, 0x00000000,
1345        0x28a4c, 0x07ffffff, 0x06000000,
1346        0x4d8, 0x00000fff, 0x00000100,
1347        0x3e78, 0x00000001, 0x00000002,
1348        0xc768, 0x00000008, 0x00000008,
1349        0x8c00, 0x000000ff, 0x00000003,
1350        0x214f8, 0x01ff01ff, 0x00000002,
1351        0x21498, 0x007ff800, 0x00200000,
1352        0x2015c, 0xffffffff, 0x00000f40,
1353        0x88c4, 0x001f3ae3, 0x00000082,
1354        0x88d4, 0x0000001f, 0x00000010,
1355        0x30934, 0xffffffff, 0x00000000
1356};
1357
1358static const u32 kalindi_mgcg_cgcg_init[] =
1359{
1360        0xc420, 0xffffffff, 0xfffffffc,
1361        0x30800, 0xffffffff, 0xe0000000,
1362        0x3c2a0, 0xffffffff, 0x00000100,
1363        0x3c208, 0xffffffff, 0x00000100,
1364        0x3c2c0, 0xffffffff, 0x00000100,
1365        0x3c2c8, 0xffffffff, 0x00000100,
1366        0x3c2c4, 0xffffffff, 0x00000100,
1367        0x55e4, 0xffffffff, 0x00600100,
1368        0x3c280, 0xffffffff, 0x00000100,
1369        0x3c214, 0xffffffff, 0x06000100,
1370        0x3c220, 0xffffffff, 0x00000100,
1371        0x3c218, 0xffffffff, 0x06000100,
1372        0x3c204, 0xffffffff, 0x00000100,
1373        0x3c2e0, 0xffffffff, 0x00000100,
1374        0x3c224, 0xffffffff, 0x00000100,
1375        0x3c200, 0xffffffff, 0x00000100,
1376        0x3c230, 0xffffffff, 0x00000100,
1377        0x3c234, 0xffffffff, 0x00000100,
1378        0x3c250, 0xffffffff, 0x00000100,
1379        0x3c254, 0xffffffff, 0x00000100,
1380        0x3c258, 0xffffffff, 0x00000100,
1381        0x3c25c, 0xffffffff, 0x00000100,
1382        0x3c260, 0xffffffff, 0x00000100,
1383        0x3c27c, 0xffffffff, 0x00000100,
1384        0x3c278, 0xffffffff, 0x00000100,
1385        0x3c210, 0xffffffff, 0x06000100,
1386        0x3c290, 0xffffffff, 0x00000100,
1387        0x3c274, 0xffffffff, 0x00000100,
1388        0x3c2b4, 0xffffffff, 0x00000100,
1389        0x3c2b0, 0xffffffff, 0x00000100,
1390        0x3c270, 0xffffffff, 0x00000100,
1391        0x30800, 0xffffffff, 0xe0000000,
1392        0x3c020, 0xffffffff, 0x00010000,
1393        0x3c024, 0xffffffff, 0x00030002,
1394        0x3c028, 0xffffffff, 0x00040007,
1395        0x3c02c, 0xffffffff, 0x00060005,
1396        0x3c030, 0xffffffff, 0x00090008,
1397        0x3c034, 0xffffffff, 0x00010000,
1398        0x3c038, 0xffffffff, 0x00030002,
1399        0x3c03c, 0xffffffff, 0x00040007,
1400        0x3c040, 0xffffffff, 0x00060005,
1401        0x3c044, 0xffffffff, 0x00090008,
1402        0x3c000, 0xffffffff, 0x96e00200,
1403        0x8708, 0xffffffff, 0x00900100,
1404        0xc424, 0xffffffff, 0x0020003f,
1405        0x38, 0xffffffff, 0x0140001c,
1406        0x3c, 0x000f0000, 0x000f0000,
1407        0x220, 0xffffffff, 0xC060000C,
1408        0x224, 0xc0000fff, 0x00000100,
1409        0x20a8, 0xffffffff, 0x00000104,
1410        0x55e4, 0xff000fff, 0x00000100,
1411        0x30cc, 0xc0000fff, 0x00000104,
1412        0xc1e4, 0x00000001, 0x00000001,
1413        0xd00c, 0xff000ff0, 0x00000100,
1414        0xd80c, 0xff000ff0, 0x00000100
1415};
1416
1417static const u32 hawaii_golden_spm_registers[] =
1418{
1419        0x30800, 0xe0ffffff, 0xe0000000
1420};
1421
1422static const u32 hawaii_golden_common_registers[] =
1423{
1424        0x30800, 0xffffffff, 0xe0000000,
1425        0x28350, 0xffffffff, 0x3a00161a,
1426        0x28354, 0xffffffff, 0x0000002e,
1427        0x9a10, 0xffffffff, 0x00018208,
1428        0x98f8, 0xffffffff, 0x12011003
1429};
1430
1431static const u32 hawaii_golden_registers[] =
1432{
1433        0x3354, 0x00000333, 0x00000333,
1434        0x9a10, 0x00010000, 0x00058208,
1435        0x9830, 0xffffffff, 0x00000000,
1436        0x9834, 0xf00fffff, 0x00000400,
1437        0x9838, 0x0002021c, 0x00020200,
1438        0xc78, 0x00000080, 0x00000000,
1439        0x5bb0, 0x000000f0, 0x00000070,
1440        0x5bc0, 0xf0311fff, 0x80300000,
1441        0x350c, 0x00810000, 0x408af000,
1442        0x7030, 0x31000111, 0x00000011,
1443        0x2f48, 0x73773777, 0x12010001,
1444        0x2120, 0x0000007f, 0x0000001b,
1445        0x21dc, 0x00007fb6, 0x00002191,
1446        0x3628, 0x0000003f, 0x0000000a,
1447        0x362c, 0x0000003f, 0x0000000a,
1448        0x2ae4, 0x00073ffe, 0x000022a2,
1449        0x240c, 0x000007ff, 0x00000000,
1450        0x8bf0, 0x00002001, 0x00000001,
1451        0x8b24, 0xffffffff, 0x00ffffff,
1452        0x30a04, 0x0000ff0f, 0x00000000,
1453        0x28a4c, 0x07ffffff, 0x06000000,
1454        0x3e78, 0x00000001, 0x00000002,
1455        0xc768, 0x00000008, 0x00000008,
1456        0xc770, 0x00000f00, 0x00000800,
1457        0xc774, 0x00000f00, 0x00000800,
1458        0xc798, 0x00ffffff, 0x00ff7fbf,
1459        0xc79c, 0x00ffffff, 0x00ff7faf,
1460        0x8c00, 0x000000ff, 0x00000800,
1461        0xe40, 0x00001fff, 0x00001fff,
1462        0x9060, 0x0000007f, 0x00000020,
1463        0x9508, 0x00010000, 0x00010000,
1464        0xae00, 0x00100000, 0x000ff07c,
1465        0xac14, 0x000003ff, 0x0000000f,
1466        0xac10, 0xffffffff, 0x7564fdec,
1467        0xac0c, 0xffffffff, 0x3120b9a8,
1468        0xac08, 0x20000000, 0x0f9c0000
1469};
1470
1471static const u32 hawaii_mgcg_cgcg_init[] =
1472{
1473        0xc420, 0xffffffff, 0xfffffffd,
1474        0x30800, 0xffffffff, 0xe0000000,
1475        0x3c2a0, 0xffffffff, 0x00000100,
1476        0x3c208, 0xffffffff, 0x00000100,
1477        0x3c2c0, 0xffffffff, 0x00000100,
1478        0x3c2c8, 0xffffffff, 0x00000100,
1479        0x3c2c4, 0xffffffff, 0x00000100,
1480        0x55e4, 0xffffffff, 0x00200100,
1481        0x3c280, 0xffffffff, 0x00000100,
1482        0x3c214, 0xffffffff, 0x06000100,
1483        0x3c220, 0xffffffff, 0x00000100,
1484        0x3c218, 0xffffffff, 0x06000100,
1485        0x3c204, 0xffffffff, 0x00000100,
1486        0x3c2e0, 0xffffffff, 0x00000100,
1487        0x3c224, 0xffffffff, 0x00000100,
1488        0x3c200, 0xffffffff, 0x00000100,
1489        0x3c230, 0xffffffff, 0x00000100,
1490        0x3c234, 0xffffffff, 0x00000100,
1491        0x3c250, 0xffffffff, 0x00000100,
1492        0x3c254, 0xffffffff, 0x00000100,
1493        0x3c258, 0xffffffff, 0x00000100,
1494        0x3c25c, 0xffffffff, 0x00000100,
1495        0x3c260, 0xffffffff, 0x00000100,
1496        0x3c27c, 0xffffffff, 0x00000100,
1497        0x3c278, 0xffffffff, 0x00000100,
1498        0x3c210, 0xffffffff, 0x06000100,
1499        0x3c290, 0xffffffff, 0x00000100,
1500        0x3c274, 0xffffffff, 0x00000100,
1501        0x3c2b4, 0xffffffff, 0x00000100,
1502        0x3c2b0, 0xffffffff, 0x00000100,
1503        0x3c270, 0xffffffff, 0x00000100,
1504        0x30800, 0xffffffff, 0xe0000000,
1505        0x3c020, 0xffffffff, 0x00010000,
1506        0x3c024, 0xffffffff, 0x00030002,
1507        0x3c028, 0xffffffff, 0x00040007,
1508        0x3c02c, 0xffffffff, 0x00060005,
1509        0x3c030, 0xffffffff, 0x00090008,
1510        0x3c034, 0xffffffff, 0x00010000,
1511        0x3c038, 0xffffffff, 0x00030002,
1512        0x3c03c, 0xffffffff, 0x00040007,
1513        0x3c040, 0xffffffff, 0x00060005,
1514        0x3c044, 0xffffffff, 0x00090008,
1515        0x3c048, 0xffffffff, 0x00010000,
1516        0x3c04c, 0xffffffff, 0x00030002,
1517        0x3c050, 0xffffffff, 0x00040007,
1518        0x3c054, 0xffffffff, 0x00060005,
1519        0x3c058, 0xffffffff, 0x00090008,
1520        0x3c05c, 0xffffffff, 0x00010000,
1521        0x3c060, 0xffffffff, 0x00030002,
1522        0x3c064, 0xffffffff, 0x00040007,
1523        0x3c068, 0xffffffff, 0x00060005,
1524        0x3c06c, 0xffffffff, 0x00090008,
1525        0x3c070, 0xffffffff, 0x00010000,
1526        0x3c074, 0xffffffff, 0x00030002,
1527        0x3c078, 0xffffffff, 0x00040007,
1528        0x3c07c, 0xffffffff, 0x00060005,
1529        0x3c080, 0xffffffff, 0x00090008,
1530        0x3c084, 0xffffffff, 0x00010000,
1531        0x3c088, 0xffffffff, 0x00030002,
1532        0x3c08c, 0xffffffff, 0x00040007,
1533        0x3c090, 0xffffffff, 0x00060005,
1534        0x3c094, 0xffffffff, 0x00090008,
1535        0x3c098, 0xffffffff, 0x00010000,
1536        0x3c09c, 0xffffffff, 0x00030002,
1537        0x3c0a0, 0xffffffff, 0x00040007,
1538        0x3c0a4, 0xffffffff, 0x00060005,
1539        0x3c0a8, 0xffffffff, 0x00090008,
1540        0x3c0ac, 0xffffffff, 0x00010000,
1541        0x3c0b0, 0xffffffff, 0x00030002,
1542        0x3c0b4, 0xffffffff, 0x00040007,
1543        0x3c0b8, 0xffffffff, 0x00060005,
1544        0x3c0bc, 0xffffffff, 0x00090008,
1545        0x3c0c0, 0xffffffff, 0x00010000,
1546        0x3c0c4, 0xffffffff, 0x00030002,
1547        0x3c0c8, 0xffffffff, 0x00040007,
1548        0x3c0cc, 0xffffffff, 0x00060005,
1549        0x3c0d0, 0xffffffff, 0x00090008,
1550        0x3c0d4, 0xffffffff, 0x00010000,
1551        0x3c0d8, 0xffffffff, 0x00030002,
1552        0x3c0dc, 0xffffffff, 0x00040007,
1553        0x3c0e0, 0xffffffff, 0x00060005,
1554        0x3c0e4, 0xffffffff, 0x00090008,
1555        0x3c0e8, 0xffffffff, 0x00010000,
1556        0x3c0ec, 0xffffffff, 0x00030002,
1557        0x3c0f0, 0xffffffff, 0x00040007,
1558        0x3c0f4, 0xffffffff, 0x00060005,
1559        0x3c0f8, 0xffffffff, 0x00090008,
1560        0xc318, 0xffffffff, 0x00020200,
1561        0x3350, 0xffffffff, 0x00000200,
1562        0x15c0, 0xffffffff, 0x00000400,
1563        0x55e8, 0xffffffff, 0x00000000,
1564        0x2f50, 0xffffffff, 0x00000902,
1565        0x3c000, 0xffffffff, 0x96940200,
1566        0x8708, 0xffffffff, 0x00900100,
1567        0xc424, 0xffffffff, 0x0020003f,
1568        0x38, 0xffffffff, 0x0140001c,
1569        0x3c, 0x000f0000, 0x000f0000,
1570        0x220, 0xffffffff, 0xc060000c,
1571        0x224, 0xc0000fff, 0x00000100,
1572        0xf90, 0xffffffff, 0x00000100,
1573        0xf98, 0x00000101, 0x00000000,
1574        0x20a8, 0xffffffff, 0x00000104,
1575        0x55e4, 0xff000fff, 0x00000100,
1576        0x30cc, 0xc0000fff, 0x00000104,
1577        0xc1e4, 0x00000001, 0x00000001,
1578        0xd00c, 0xff000ff0, 0x00000100,
1579        0xd80c, 0xff000ff0, 0x00000100
1580};
1581
1582static const u32 godavari_golden_registers[] =
1583{
1584        0x55e4, 0xff607fff, 0xfc000100,
1585        0x6ed8, 0x00010101, 0x00010000,
1586        0x9830, 0xffffffff, 0x00000000,
1587        0x98302, 0xf00fffff, 0x00000400,
1588        0x6130, 0xffffffff, 0x00010000,
1589        0x5bb0, 0x000000f0, 0x00000070,
1590        0x5bc0, 0xf0311fff, 0x80300000,
1591        0x98f8, 0x73773777, 0x12010001,
1592        0x98fc, 0xffffffff, 0x00000010,
1593        0x8030, 0x00001f0f, 0x0000100a,
1594        0x2f48, 0x73773777, 0x12010001,
1595        0x2408, 0x000fffff, 0x000c007f,
1596        0x8a14, 0xf000003f, 0x00000007,
1597        0x8b24, 0xffffffff, 0x00ff0fff,
1598        0x30a04, 0x0000ff0f, 0x00000000,
1599        0x28a4c, 0x07ffffff, 0x06000000,
1600        0x4d8, 0x00000fff, 0x00000100,
1601        0xd014, 0x00010000, 0x00810001,
1602        0xd814, 0x00010000, 0x00810001,
1603        0x3e78, 0x00000001, 0x00000002,
1604        0xc768, 0x00000008, 0x00000008,
1605        0xc770, 0x00000f00, 0x00000800,
1606        0xc774, 0x00000f00, 0x00000800,
1607        0xc798, 0x00ffffff, 0x00ff7fbf,
1608        0xc79c, 0x00ffffff, 0x00ff7faf,
1609        0x8c00, 0x000000ff, 0x00000001,
1610        0x214f8, 0x01ff01ff, 0x00000002,
1611        0x21498, 0x007ff800, 0x00200000,
1612        0x2015c, 0xffffffff, 0x00000f40,
1613        0x88c4, 0x001f3ae3, 0x00000082,
1614        0x88d4, 0x0000001f, 0x00000010,
1615        0x30934, 0xffffffff, 0x00000000
1616};
1617
1618
1619static void cik_init_golden_registers(struct radeon_device *rdev)
1620{
1621        switch (rdev->family) {
1622        case CHIP_BONAIRE:
1623                radeon_program_register_sequence(rdev,
1624                                                 bonaire_mgcg_cgcg_init,
1625                                                 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626                radeon_program_register_sequence(rdev,
1627                                                 bonaire_golden_registers,
1628                                                 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629                radeon_program_register_sequence(rdev,
1630                                                 bonaire_golden_common_registers,
1631                                                 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632                radeon_program_register_sequence(rdev,
1633                                                 bonaire_golden_spm_registers,
1634                                                 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635                break;
1636        case CHIP_KABINI:
1637                radeon_program_register_sequence(rdev,
1638                                                 kalindi_mgcg_cgcg_init,
1639                                                 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640                radeon_program_register_sequence(rdev,
1641                                                 kalindi_golden_registers,
1642                                                 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643                radeon_program_register_sequence(rdev,
1644                                                 kalindi_golden_common_registers,
1645                                                 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646                radeon_program_register_sequence(rdev,
1647                                                 kalindi_golden_spm_registers,
1648                                                 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649                break;
1650        case CHIP_MULLINS:
1651                radeon_program_register_sequence(rdev,
1652                                                 kalindi_mgcg_cgcg_init,
1653                                                 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654                radeon_program_register_sequence(rdev,
1655                                                 godavari_golden_registers,
1656                                                 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657                radeon_program_register_sequence(rdev,
1658                                                 kalindi_golden_common_registers,
1659                                                 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660                radeon_program_register_sequence(rdev,
1661                                                 kalindi_golden_spm_registers,
1662                                                 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663                break;
1664        case CHIP_KAVERI:
1665                radeon_program_register_sequence(rdev,
1666                                                 spectre_mgcg_cgcg_init,
1667                                                 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668                radeon_program_register_sequence(rdev,
1669                                                 spectre_golden_registers,
1670                                                 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671                radeon_program_register_sequence(rdev,
1672                                                 spectre_golden_common_registers,
1673                                                 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674                radeon_program_register_sequence(rdev,
1675                                                 spectre_golden_spm_registers,
1676                                                 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677                break;
1678        case CHIP_HAWAII:
1679                radeon_program_register_sequence(rdev,
1680                                                 hawaii_mgcg_cgcg_init,
1681                                                 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682                radeon_program_register_sequence(rdev,
1683                                                 hawaii_golden_registers,
1684                                                 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685                radeon_program_register_sequence(rdev,
1686                                                 hawaii_golden_common_registers,
1687                                                 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688                radeon_program_register_sequence(rdev,
1689                                                 hawaii_golden_spm_registers,
1690                                                 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691                break;
1692        default:
1693                break;
1694        }
1695}
1696
1697/**
1698 * cik_get_xclk - get the xclk
1699 *
1700 * @rdev: radeon_device pointer
1701 *
1702 * Returns the reference clock used by the gfx engine
1703 * (CIK).
1704 */
1705u32 cik_get_xclk(struct radeon_device *rdev)
1706{
1707        u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709        if (rdev->flags & RADEON_IS_IGP) {
1710                if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711                        return reference_clock / 2;
1712        } else {
1713                if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714                        return reference_clock / 4;
1715        }
1716        return reference_clock;
1717}
1718
1719/**
1720 * cik_mm_rdoorbell - read a doorbell dword
1721 *
1722 * @rdev: radeon_device pointer
1723 * @index: doorbell index
1724 *
1725 * Returns the value in the doorbell aperture at the
1726 * requested doorbell index (CIK).
1727 */
1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729{
1730        if (index < rdev->doorbell.num_doorbells) {
1731                return readl(rdev->doorbell.ptr + index);
1732        } else {
1733                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734                return 0;
1735        }
1736}
1737
1738/**
1739 * cik_mm_wdoorbell - write a doorbell dword
1740 *
1741 * @rdev: radeon_device pointer
1742 * @index: doorbell index
1743 * @v: value to write
1744 *
1745 * Writes @v to the doorbell aperture at the
1746 * requested doorbell index (CIK).
1747 */
1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749{
1750        if (index < rdev->doorbell.num_doorbells) {
1751                writel(v, rdev->doorbell.ptr + index);
1752        } else {
1753                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754        }
1755}
1756
1757#define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760{
1761        {0x00000070, 0x04400000},
1762        {0x00000071, 0x80c01803},
1763        {0x00000072, 0x00004004},
1764        {0x00000073, 0x00000100},
1765        {0x00000074, 0x00ff0000},
1766        {0x00000075, 0x34000000},
1767        {0x00000076, 0x08000014},
1768        {0x00000077, 0x00cc08ec},
1769        {0x00000078, 0x00000400},
1770        {0x00000079, 0x00000000},
1771        {0x0000007a, 0x04090000},
1772        {0x0000007c, 0x00000000},
1773        {0x0000007e, 0x4408a8e8},
1774        {0x0000007f, 0x00000304},
1775        {0x00000080, 0x00000000},
1776        {0x00000082, 0x00000001},
1777        {0x00000083, 0x00000002},
1778        {0x00000084, 0xf3e4f400},
1779        {0x00000085, 0x052024e3},
1780        {0x00000087, 0x00000000},
1781        {0x00000088, 0x01000000},
1782        {0x0000008a, 0x1c0a0000},
1783        {0x0000008b, 0xff010000},
1784        {0x0000008d, 0xffffefff},
1785        {0x0000008e, 0xfff3efff},
1786        {0x0000008f, 0xfff3efbf},
1787        {0x00000092, 0xf7ffffff},
1788        {0x00000093, 0xffffff7f},
1789        {0x00000095, 0x00101101},
1790        {0x00000096, 0x00000fff},
1791        {0x00000097, 0x00116fff},
1792        {0x00000098, 0x60010000},
1793        {0x00000099, 0x10010000},
1794        {0x0000009a, 0x00006000},
1795        {0x0000009b, 0x00001000},
1796        {0x0000009f, 0x00b48000}
1797};
1798
1799#define HAWAII_IO_MC_REGS_SIZE 22
1800
1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802{
1803        {0x0000007d, 0x40000000},
1804        {0x0000007e, 0x40180304},
1805        {0x0000007f, 0x0000ff00},
1806        {0x00000081, 0x00000000},
1807        {0x00000083, 0x00000800},
1808        {0x00000086, 0x00000000},
1809        {0x00000087, 0x00000100},
1810        {0x00000088, 0x00020100},
1811        {0x00000089, 0x00000000},
1812        {0x0000008b, 0x00040000},
1813        {0x0000008c, 0x00000100},
1814        {0x0000008e, 0xff010000},
1815        {0x00000090, 0xffffefff},
1816        {0x00000091, 0xfff3efff},
1817        {0x00000092, 0xfff3efbf},
1818        {0x00000093, 0xf7ffffff},
1819        {0x00000094, 0xffffff7f},
1820        {0x00000095, 0x00000fff},
1821        {0x00000096, 0x00116fff},
1822        {0x00000097, 0x60010000},
1823        {0x00000098, 0x10010000},
1824        {0x0000009f, 0x00c79000}
1825};
1826
1827
1828/**
1829 * cik_srbm_select - select specific register instances
1830 *
1831 * @rdev: radeon_device pointer
1832 * @me: selected ME (micro engine)
1833 * @pipe: pipe
1834 * @queue: queue
1835 * @vmid: VMID
1836 *
1837 * Switches the currently active registers instances.  Some
1838 * registers are instanced per VMID, others are instanced per
1839 * me/pipe/queue combination.
1840 */
1841static void cik_srbm_select(struct radeon_device *rdev,
1842                            u32 me, u32 pipe, u32 queue, u32 vmid)
1843{
1844        u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845                             MEID(me & 0x3) |
1846                             VMID(vmid & 0xf) |
1847                             QUEUEID(queue & 0x7));
1848        WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849}
1850
1851/* ucode loading */
1852/**
1853 * ci_mc_load_microcode - load MC ucode into the hw
1854 *
1855 * @rdev: radeon_device pointer
1856 *
1857 * Load the GDDR MC ucode into the hw (CIK).
1858 * Returns 0 on success, error on failure.
1859 */
1860int ci_mc_load_microcode(struct radeon_device *rdev)
1861{
1862        const __be32 *fw_data = NULL;
1863        const __le32 *new_fw_data = NULL;
1864        u32 running, tmp;
1865        u32 *io_mc_regs = NULL;
1866        const __le32 *new_io_mc_regs = NULL;
1867        int i, regs_size, ucode_size;
1868
1869        if (!rdev->mc_fw)
1870                return -EINVAL;
1871
1872        if (rdev->new_fw) {
1873                const struct mc_firmware_header_v1_0 *hdr =
1874                        (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876                radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878                regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879                new_io_mc_regs = (const __le32 *)
1880                        (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881                ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882                new_fw_data = (const __le32 *)
1883                        (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884        } else {
1885                ucode_size = rdev->mc_fw->size / 4;
1886
1887                switch (rdev->family) {
1888                case CHIP_BONAIRE:
1889                        io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890                        regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891                        break;
1892                case CHIP_HAWAII:
1893                        io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894                        regs_size = HAWAII_IO_MC_REGS_SIZE;
1895                        break;
1896                default:
1897                        return -EINVAL;
1898                }
1899                fw_data = (const __be32 *)rdev->mc_fw->data;
1900        }
1901
1902        running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904        if (running == 0) {
1905                /* reset the engine and set to writable */
1906                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907                WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909                /* load mc io regs */
1910                for (i = 0; i < regs_size; i++) {
1911                        if (rdev->new_fw) {
1912                                WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913                                WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914                        } else {
1915                                WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916                                WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917                        }
1918                }
1919
1920                tmp = RREG32(MC_SEQ_MISC0);
1921                if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922                        WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923                        WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924                        WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925                        WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926                }
1927
1928                /* load the MC ucode */
1929                for (i = 0; i < ucode_size; i++) {
1930                        if (rdev->new_fw)
1931                                WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932                        else
1933                                WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934                }
1935
1936                /* put the engine back into the active state */
1937                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938                WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939                WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941                /* wait for training to complete */
1942                for (i = 0; i < rdev->usec_timeout; i++) {
1943                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944                                break;
1945                        udelay(1);
1946                }
1947                for (i = 0; i < rdev->usec_timeout; i++) {
1948                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949                                break;
1950                        udelay(1);
1951                }
1952        }
1953
1954        return 0;
1955}
1956
1957/**
1958 * cik_init_microcode - load ucode images from disk
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Use the firmware interface to load the ucode images into
1963 * the driver (not loaded into hw).
1964 * Returns 0 on success, error on failure.
1965 */
1966static int cik_init_microcode(struct radeon_device *rdev)
1967{
1968        const char *chip_name;
1969        const char *new_chip_name;
1970        size_t pfp_req_size, me_req_size, ce_req_size,
1971                mec_req_size, rlc_req_size, mc_req_size = 0,
1972                sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973        char fw_name[30];
1974        int new_fw = 0;
1975        int err;
1976        int num_fw;
1977        bool new_smc = false;
1978
1979        DRM_DEBUG("\n");
1980
1981        switch (rdev->family) {
1982        case CHIP_BONAIRE:
1983                chip_name = "BONAIRE";
1984                if ((rdev->pdev->revision == 0x80) ||
1985                    (rdev->pdev->revision == 0x81) ||
1986                    (rdev->pdev->device == 0x665f))
1987                        new_smc = true;
1988                new_chip_name = "bonaire";
1989                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990                me_req_size = CIK_ME_UCODE_SIZE * 4;
1991                ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994                mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995                mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998                num_fw = 8;
1999                break;
2000        case CHIP_HAWAII:
2001                chip_name = "HAWAII";
2002                if (rdev->pdev->revision == 0x80)
2003                        new_smc = true;
2004                new_chip_name = "hawaii";
2005                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006                me_req_size = CIK_ME_UCODE_SIZE * 4;
2007                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010                mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011                mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013                smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014                num_fw = 8;
2015                break;
2016        case CHIP_KAVERI:
2017                chip_name = "KAVERI";
2018                new_chip_name = "kaveri";
2019                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020                me_req_size = CIK_ME_UCODE_SIZE * 4;
2021                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023                rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025                num_fw = 7;
2026                break;
2027        case CHIP_KABINI:
2028                chip_name = "KABINI";
2029                new_chip_name = "kabini";
2030                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031                me_req_size = CIK_ME_UCODE_SIZE * 4;
2032                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034                rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036                num_fw = 6;
2037                break;
2038        case CHIP_MULLINS:
2039                chip_name = "MULLINS";
2040                new_chip_name = "mullins";
2041                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042                me_req_size = CIK_ME_UCODE_SIZE * 4;
2043                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045                rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047                num_fw = 6;
2048                break;
2049        default: BUG();
2050        }
2051
2052        DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055        err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056        if (err) {
2057                snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058                err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059                if (err)
2060                        goto out;
2061                if (rdev->pfp_fw->size != pfp_req_size) {
2062                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063                               rdev->pfp_fw->size, fw_name);
2064                        err = -EINVAL;
2065                        goto out;
2066                }
2067        } else {
2068                err = radeon_ucode_validate(rdev->pfp_fw);
2069                if (err) {
2070                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071                               fw_name);
2072                        goto out;
2073                } else {
2074                        new_fw++;
2075                }
2076        }
2077
2078        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080        if (err) {
2081                snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082                err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083                if (err)
2084                        goto out;
2085                if (rdev->me_fw->size != me_req_size) {
2086                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087                               rdev->me_fw->size, fw_name);
2088                        err = -EINVAL;
2089                }
2090        } else {
2091                err = radeon_ucode_validate(rdev->me_fw);
2092                if (err) {
2093                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094                               fw_name);
2095                        goto out;
2096                } else {
2097                        new_fw++;
2098                }
2099        }
2100
2101        snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102        err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103        if (err) {
2104                snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105                err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106                if (err)
2107                        goto out;
2108                if (rdev->ce_fw->size != ce_req_size) {
2109                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110                               rdev->ce_fw->size, fw_name);
2111                        err = -EINVAL;
2112                }
2113        } else {
2114                err = radeon_ucode_validate(rdev->ce_fw);
2115                if (err) {
2116                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117                               fw_name);
2118                        goto out;
2119                } else {
2120                        new_fw++;
2121                }
2122        }
2123
2124        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125        err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126        if (err) {
2127                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128                err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129                if (err)
2130                        goto out;
2131                if (rdev->mec_fw->size != mec_req_size) {
2132                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133                               rdev->mec_fw->size, fw_name);
2134                        err = -EINVAL;
2135                }
2136        } else {
2137                err = radeon_ucode_validate(rdev->mec_fw);
2138                if (err) {
2139                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140                               fw_name);
2141                        goto out;
2142                } else {
2143                        new_fw++;
2144                }
2145        }
2146
2147        if (rdev->family == CHIP_KAVERI) {
2148                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149                err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150                if (err) {
2151                        goto out;
2152                } else {
2153                        err = radeon_ucode_validate(rdev->mec2_fw);
2154                        if (err) {
2155                                goto out;
2156                        } else {
2157                                new_fw++;
2158                        }
2159                }
2160        }
2161
2162        snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163        err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164        if (err) {
2165                snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166                err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167                if (err)
2168                        goto out;
2169                if (rdev->rlc_fw->size != rlc_req_size) {
2170                        pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171                               rdev->rlc_fw->size, fw_name);
2172                        err = -EINVAL;
2173                }
2174        } else {
2175                err = radeon_ucode_validate(rdev->rlc_fw);
2176                if (err) {
2177                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178                               fw_name);
2179                        goto out;
2180                } else {
2181                        new_fw++;
2182                }
2183        }
2184
2185        snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186        err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187        if (err) {
2188                snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189                err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190                if (err)
2191                        goto out;
2192                if (rdev->sdma_fw->size != sdma_req_size) {
2193                        pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194                               rdev->sdma_fw->size, fw_name);
2195                        err = -EINVAL;
2196                }
2197        } else {
2198                err = radeon_ucode_validate(rdev->sdma_fw);
2199                if (err) {
2200                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201                               fw_name);
2202                        goto out;
2203                } else {
2204                        new_fw++;
2205                }
2206        }
2207
2208        /* No SMC, MC ucode on APUs */
2209        if (!(rdev->flags & RADEON_IS_IGP)) {
2210                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212                if (err) {
2213                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214                        err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215                        if (err) {
2216                                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217                                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218                                if (err)
2219                                        goto out;
2220                        }
2221                        if ((rdev->mc_fw->size != mc_req_size) &&
2222                            (rdev->mc_fw->size != mc2_req_size)){
2223                                pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224                                       rdev->mc_fw->size, fw_name);
2225                                err = -EINVAL;
2226                        }
2227                        DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228                } else {
2229                        err = radeon_ucode_validate(rdev->mc_fw);
2230                        if (err) {
2231                                pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232                                       fw_name);
2233                                goto out;
2234                        } else {
2235                                new_fw++;
2236                        }
2237                }
2238
2239                if (new_smc)
2240                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241                else
2242                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243                err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244                if (err) {
2245                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246                        err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247                        if (err) {
2248                                pr_err("smc: error loading firmware \"%s\"\n",
2249                                       fw_name);
2250                                release_firmware(rdev->smc_fw);
2251                                rdev->smc_fw = NULL;
2252                                err = 0;
2253                        } else if (rdev->smc_fw->size != smc_req_size) {
2254                                pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255                                       rdev->smc_fw->size, fw_name);
2256                                err = -EINVAL;
2257                        }
2258                } else {
2259                        err = radeon_ucode_validate(rdev->smc_fw);
2260                        if (err) {
2261                                pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262                                       fw_name);
2263                                goto out;
2264                        } else {
2265                                new_fw++;
2266                        }
2267                }
2268        }
2269
2270        if (new_fw == 0) {
2271                rdev->new_fw = false;
2272        } else if (new_fw < num_fw) {
2273                pr_err("ci_fw: mixing new and old firmware!\n");
2274                err = -EINVAL;
2275        } else {
2276                rdev->new_fw = true;
2277        }
2278
2279out:
2280        if (err) {
2281                if (err != -EINVAL)
2282                        pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283                               fw_name);
2284                release_firmware(rdev->pfp_fw);
2285                rdev->pfp_fw = NULL;
2286                release_firmware(rdev->me_fw);
2287                rdev->me_fw = NULL;
2288                release_firmware(rdev->ce_fw);
2289                rdev->ce_fw = NULL;
2290                release_firmware(rdev->mec_fw);
2291                rdev->mec_fw = NULL;
2292                release_firmware(rdev->mec2_fw);
2293                rdev->mec2_fw = NULL;
2294                release_firmware(rdev->rlc_fw);
2295                rdev->rlc_fw = NULL;
2296                release_firmware(rdev->sdma_fw);
2297                rdev->sdma_fw = NULL;
2298                release_firmware(rdev->mc_fw);
2299                rdev->mc_fw = NULL;
2300                release_firmware(rdev->smc_fw);
2301                rdev->smc_fw = NULL;
2302        }
2303        return err;
2304}
2305
2306/*
2307 * Core functions
2308 */
2309/**
2310 * cik_tiling_mode_table_init - init the hw tiling table
2311 *
2312 * @rdev: radeon_device pointer
2313 *
2314 * Starting with SI, the tiling setup is done globally in a
2315 * set of 32 tiling modes.  Rather than selecting each set of
2316 * parameters per surface as on older asics, we just select
2317 * which index in the tiling table we want to use, and the
2318 * surface uses those parameters (CIK).
2319 */
2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321{
2322        u32 *tile = rdev->config.cik.tile_mode_array;
2323        u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324        const u32 num_tile_mode_states =
2325                        ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326        const u32 num_secondary_tile_mode_states =
2327                        ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328        u32 reg_offset, split_equal_to_row_size;
2329        u32 num_pipe_configs;
2330        u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331                rdev->config.cik.max_shader_engines;
2332
2333        switch (rdev->config.cik.mem_row_size_in_kb) {
2334        case 1:
2335                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336                break;
2337        case 2:
2338        default:
2339                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340                break;
2341        case 4:
2342                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343                break;
2344        }
2345
2346        num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347        if (num_pipe_configs > 8)
2348                num_pipe_configs = 16;
2349
2350        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351                tile[reg_offset] = 0;
2352        for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353                macrotile[reg_offset] = 0;
2354
2355        switch(num_pipe_configs) {
2356        case 16:
2357                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                           TILE_SPLIT(split_equal_to_row_size));
2377                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                           TILE_SPLIT(split_equal_to_row_size));
2388                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423                tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439                           NUM_BANKS(ADDR_SURF_16_BANK));
2440                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443                           NUM_BANKS(ADDR_SURF_16_BANK));
2444                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                           NUM_BANKS(ADDR_SURF_16_BANK));
2448                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451                           NUM_BANKS(ADDR_SURF_16_BANK));
2452                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455                           NUM_BANKS(ADDR_SURF_8_BANK));
2456                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                           NUM_BANKS(ADDR_SURF_4_BANK));
2460                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                           NUM_BANKS(ADDR_SURF_2_BANK));
2464                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                           NUM_BANKS(ADDR_SURF_16_BANK));
2468                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                           NUM_BANKS(ADDR_SURF_16_BANK));
2472                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                            NUM_BANKS(ADDR_SURF_8_BANK));
2480                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_4_BANK));
2484                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_2_BANK));
2488                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                            NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497                break;
2498
2499        case 8:
2500                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                           TILE_SPLIT(split_equal_to_row_size));
2520                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                           TILE_SPLIT(split_equal_to_row_size));
2531                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566                tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582                                NUM_BANKS(ADDR_SURF_16_BANK));
2583                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586                                NUM_BANKS(ADDR_SURF_16_BANK));
2587                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590                                NUM_BANKS(ADDR_SURF_16_BANK));
2591                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594                                NUM_BANKS(ADDR_SURF_16_BANK));
2595                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598                                NUM_BANKS(ADDR_SURF_8_BANK));
2599                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602                                NUM_BANKS(ADDR_SURF_4_BANK));
2603                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606                                NUM_BANKS(ADDR_SURF_2_BANK));
2607                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                NUM_BANKS(ADDR_SURF_16_BANK));
2611                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614                                NUM_BANKS(ADDR_SURF_16_BANK));
2615                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                NUM_BANKS(ADDR_SURF_16_BANK));
2619                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                NUM_BANKS(ADDR_SURF_16_BANK));
2623                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                NUM_BANKS(ADDR_SURF_8_BANK));
2627                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                NUM_BANKS(ADDR_SURF_4_BANK));
2631                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640                break;
2641
2642        case 4:
2643                if (num_rbs == 4) {
2644                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                           TILE_SPLIT(split_equal_to_row_size));
2664                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                           TILE_SPLIT(split_equal_to_row_size));
2675                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676                           PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723                } else if (num_rbs < 4) {
2724                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743                           TILE_SPLIT(split_equal_to_row_size));
2744                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754                           TILE_SPLIT(split_equal_to_row_size));
2755                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756                           PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802                }
2803
2804                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807                                NUM_BANKS(ADDR_SURF_16_BANK));
2808                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                NUM_BANKS(ADDR_SURF_16_BANK));
2812                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                NUM_BANKS(ADDR_SURF_16_BANK));
2816                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819                                NUM_BANKS(ADDR_SURF_16_BANK));
2820                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823                                NUM_BANKS(ADDR_SURF_16_BANK));
2824                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                NUM_BANKS(ADDR_SURF_8_BANK));
2828                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831                                NUM_BANKS(ADDR_SURF_4_BANK));
2832                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                NUM_BANKS(ADDR_SURF_16_BANK));
2836                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                NUM_BANKS(ADDR_SURF_16_BANK));
2840                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843                                NUM_BANKS(ADDR_SURF_16_BANK));
2844                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                NUM_BANKS(ADDR_SURF_16_BANK));
2848                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                NUM_BANKS(ADDR_SURF_16_BANK));
2852                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                NUM_BANKS(ADDR_SURF_8_BANK));
2856                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865                break;
2866
2867        case 2:
2868                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870                           PIPE_CONFIG(ADDR_SURF_P2) |
2871                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874                           PIPE_CONFIG(ADDR_SURF_P2) |
2875                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878                           PIPE_CONFIG(ADDR_SURF_P2) |
2879                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882                           PIPE_CONFIG(ADDR_SURF_P2) |
2883                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886                           PIPE_CONFIG(ADDR_SURF_P2) |
2887                           TILE_SPLIT(split_equal_to_row_size));
2888                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889                           PIPE_CONFIG(ADDR_SURF_P2) |
2890                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893                           PIPE_CONFIG(ADDR_SURF_P2) |
2894                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897                           PIPE_CONFIG(ADDR_SURF_P2) |
2898                           TILE_SPLIT(split_equal_to_row_size));
2899                tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900                           PIPE_CONFIG(ADDR_SURF_P2);
2901                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903                           PIPE_CONFIG(ADDR_SURF_P2));
2904                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914                            PIPE_CONFIG(ADDR_SURF_P2) |
2915                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                            PIPE_CONFIG(ADDR_SURF_P2) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P2) |
2922                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925                            PIPE_CONFIG(ADDR_SURF_P2) |
2926                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929                            PIPE_CONFIG(ADDR_SURF_P2) |
2930                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933                            PIPE_CONFIG(ADDR_SURF_P2));
2934                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936                            PIPE_CONFIG(ADDR_SURF_P2) |
2937                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940                            PIPE_CONFIG(ADDR_SURF_P2) |
2941                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944                            PIPE_CONFIG(ADDR_SURF_P2) |
2945                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950                                NUM_BANKS(ADDR_SURF_16_BANK));
2951                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                NUM_BANKS(ADDR_SURF_16_BANK));
2955                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958                                NUM_BANKS(ADDR_SURF_16_BANK));
2959                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962                                NUM_BANKS(ADDR_SURF_16_BANK));
2963                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                NUM_BANKS(ADDR_SURF_16_BANK));
2967                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                NUM_BANKS(ADDR_SURF_16_BANK));
2971                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974                                NUM_BANKS(ADDR_SURF_8_BANK));
2975                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                NUM_BANKS(ADDR_SURF_16_BANK));
2979                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                NUM_BANKS(ADDR_SURF_16_BANK));
2983                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                NUM_BANKS(ADDR_SURF_16_BANK));
2987                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                NUM_BANKS(ADDR_SURF_16_BANK));
2991                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                NUM_BANKS(ADDR_SURF_16_BANK));
2995                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                NUM_BANKS(ADDR_SURF_16_BANK));
2999                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008                break;
3009
3010        default:
3011                DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012        }
3013}
3014
3015/**
3016 * cik_select_se_sh - select which SE, SH to address
3017 *
3018 * @rdev: radeon_device pointer
3019 * @se_num: shader engine to address
3020 * @sh_num: sh block to address
3021 *
3022 * Select which SE, SH combinations to address. Certain
3023 * registers are instanced per SE or SH.  0xffffffff means
3024 * broadcast to all SEs or SHs (CIK).
3025 */
3026static void cik_select_se_sh(struct radeon_device *rdev,
3027                             u32 se_num, u32 sh_num)
3028{
3029        u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032                data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033        else if (se_num == 0xffffffff)
3034                data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035        else if (sh_num == 0xffffffff)
3036                data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037        else
3038                data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039        WREG32(GRBM_GFX_INDEX, data);
3040}
3041
3042/**
3043 * cik_create_bitmask - create a bitmask
3044 *
3045 * @bit_width: length of the mask
3046 *
3047 * create a variable length bit mask (CIK).
3048 * Returns the bitmask.
3049 */
3050static u32 cik_create_bitmask(u32 bit_width)
3051{
3052        u32 i, mask = 0;
3053
3054        for (i = 0; i < bit_width; i++) {
3055                mask <<= 1;
3056                mask |= 1;
3057        }
3058        return mask;
3059}
3060
3061/**
3062 * cik_get_rb_disabled - computes the mask of disabled RBs
3063 *
3064 * @rdev: radeon_device pointer
3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066 * @sh_per_se: number of SH blocks per SE for the asic
3067 *
3068 * Calculates the bitmask of disabled RBs (CIK).
3069 * Returns the disabled RB bitmask.
3070 */
3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072                              u32 max_rb_num_per_se,
3073                              u32 sh_per_se)
3074{
3075        u32 data, mask;
3076
3077        data = RREG32(CC_RB_BACKEND_DISABLE);
3078        if (data & 1)
3079                data &= BACKEND_DISABLE_MASK;
3080        else
3081                data = 0;
3082        data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084        data >>= BACKEND_DISABLE_SHIFT;
3085
3086        mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088        return data & mask;
3089}
3090
3091/**
3092 * cik_setup_rb - setup the RBs on the asic
3093 *
3094 * @rdev: radeon_device pointer
3095 * @se_num: number of SEs (shader engines) for the asic
3096 * @sh_per_se: number of SH blocks per SE for the asic
3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098 *
3099 * Configures per-SE/SH RB registers (CIK).
3100 */
3101static void cik_setup_rb(struct radeon_device *rdev,
3102                         u32 se_num, u32 sh_per_se,
3103                         u32 max_rb_num_per_se)
3104{
3105        int i, j;
3106        u32 data, mask;
3107        u32 disabled_rbs = 0;
3108        u32 enabled_rbs = 0;
3109
3110        for (i = 0; i < se_num; i++) {
3111                for (j = 0; j < sh_per_se; j++) {
3112                        cik_select_se_sh(rdev, i, j);
3113                        data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114                        if (rdev->family == CHIP_HAWAII)
3115                                disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116                        else
3117                                disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118                }
3119        }
3120        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122        mask = 1;
3123        for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124                if (!(disabled_rbs & mask))
3125                        enabled_rbs |= mask;
3126                mask <<= 1;
3127        }
3128
3129        rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131        for (i = 0; i < se_num; i++) {
3132                cik_select_se_sh(rdev, i, 0xffffffff);
3133                data = 0;
3134                for (j = 0; j < sh_per_se; j++) {
3135                        switch (enabled_rbs & 3) {
3136                        case 0:
3137                                if (j == 0)
3138                                        data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139                                else
3140                                        data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141                                break;
3142                        case 1:
3143                                data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144                                break;
3145                        case 2:
3146                                data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147                                break;
3148                        case 3:
3149                        default:
3150                                data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151                                break;
3152                        }
3153                        enabled_rbs >>= 2;
3154                }
3155                WREG32(PA_SC_RASTER_CONFIG, data);
3156        }
3157        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158}
3159
3160/**
3161 * cik_gpu_init - setup the 3D engine
3162 *
3163 * @rdev: radeon_device pointer
3164 *
3165 * Configures the 3D engine and tiling configuration
3166 * registers so that the 3D engine is usable.
3167 */
3168static void cik_gpu_init(struct radeon_device *rdev)
3169{
3170        u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171        u32 mc_arb_ramcfg;
3172        u32 hdp_host_path_cntl;
3173        u32 tmp;
3174        int i, j;
3175
3176        switch (rdev->family) {
3177        case CHIP_BONAIRE:
3178                rdev->config.cik.max_shader_engines = 2;
3179                rdev->config.cik.max_tile_pipes = 4;
3180                rdev->config.cik.max_cu_per_sh = 7;
3181                rdev->config.cik.max_sh_per_se = 1;
3182                rdev->config.cik.max_backends_per_se = 2;
3183                rdev->config.cik.max_texture_channel_caches = 4;
3184                rdev->config.cik.max_gprs = 256;
3185                rdev->config.cik.max_gs_threads = 32;
3186                rdev->config.cik.max_hw_contexts = 8;
3187
3188                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193                break;
3194        case CHIP_HAWAII:
3195                rdev->config.cik.max_shader_engines = 4;
3196                rdev->config.cik.max_tile_pipes = 16;
3197                rdev->config.cik.max_cu_per_sh = 11;
3198                rdev->config.cik.max_sh_per_se = 1;
3199                rdev->config.cik.max_backends_per_se = 4;
3200                rdev->config.cik.max_texture_channel_caches = 16;
3201                rdev->config.cik.max_gprs = 256;
3202                rdev->config.cik.max_gs_threads = 32;
3203                rdev->config.cik.max_hw_contexts = 8;
3204
3205                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209                gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210                break;
3211        case CHIP_KAVERI:
3212                rdev->config.cik.max_shader_engines = 1;
3213                rdev->config.cik.max_tile_pipes = 4;
3214                rdev->config.cik.max_cu_per_sh = 8;
3215                rdev->config.cik.max_backends_per_se = 2;
3216                rdev->config.cik.max_sh_per_se = 1;
3217                rdev->config.cik.max_texture_channel_caches = 4;
3218                rdev->config.cik.max_gprs = 256;
3219                rdev->config.cik.max_gs_threads = 16;
3220                rdev->config.cik.max_hw_contexts = 8;
3221
3222                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227                break;
3228        case CHIP_KABINI:
3229        case CHIP_MULLINS:
3230        default:
3231                rdev->config.cik.max_shader_engines = 1;
3232                rdev->config.cik.max_tile_pipes = 2;
3233                rdev->config.cik.max_cu_per_sh = 2;
3234                rdev->config.cik.max_sh_per_se = 1;
3235                rdev->config.cik.max_backends_per_se = 1;
3236                rdev->config.cik.max_texture_channel_caches = 2;
3237                rdev->config.cik.max_gprs = 256;
3238                rdev->config.cik.max_gs_threads = 16;
3239                rdev->config.cik.max_hw_contexts = 8;
3240
3241                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246                break;
3247        }
3248
3249        /* Initialize HDP */
3250        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251                WREG32((0x2c14 + j), 0x00000000);
3252                WREG32((0x2c18 + j), 0x00000000);
3253                WREG32((0x2c1c + j), 0x00000000);
3254                WREG32((0x2c20 + j), 0x00000000);
3255                WREG32((0x2c24 + j), 0x00000000);
3256        }
3257
3258        WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259        WREG32(SRBM_INT_CNTL, 0x1);
3260        WREG32(SRBM_INT_ACK, 0x1);
3261
3262        WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264        RREG32(MC_SHARED_CHMAP);
3265        mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267        rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268        rdev->config.cik.mem_max_burst_length_bytes = 256;
3269        tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270        rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271        if (rdev->config.cik.mem_row_size_in_kb > 4)
3272                rdev->config.cik.mem_row_size_in_kb = 4;
3273        /* XXX use MC settings? */
3274        rdev->config.cik.shader_engine_tile_size = 32;
3275        rdev->config.cik.num_gpus = 1;
3276        rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278        /* fix up row size */
3279        gb_addr_config &= ~ROW_SIZE_MASK;
3280        switch (rdev->config.cik.mem_row_size_in_kb) {
3281        case 1:
3282        default:
3283                gb_addr_config |= ROW_SIZE(0);
3284                break;
3285        case 2:
3286                gb_addr_config |= ROW_SIZE(1);
3287                break;
3288        case 4:
3289                gb_addr_config |= ROW_SIZE(2);
3290                break;
3291        }
3292
3293        /* setup tiling info dword.  gb_addr_config is not adequate since it does
3294         * not have bank info, so create a custom tiling dword.
3295         * bits 3:0   num_pipes
3296         * bits 7:4   num_banks
3297         * bits 11:8  group_size
3298         * bits 15:12 row_size
3299         */
3300        rdev->config.cik.tile_config = 0;
3301        switch (rdev->config.cik.num_tile_pipes) {
3302        case 1:
3303                rdev->config.cik.tile_config |= (0 << 0);
3304                break;
3305        case 2:
3306                rdev->config.cik.tile_config |= (1 << 0);
3307                break;
3308        case 4:
3309                rdev->config.cik.tile_config |= (2 << 0);
3310                break;
3311        case 8:
3312        default:
3313                /* XXX what about 12? */
3314                rdev->config.cik.tile_config |= (3 << 0);
3315                break;
3316        }
3317        rdev->config.cik.tile_config |=
3318                ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319        rdev->config.cik.tile_config |=
3320                ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321        rdev->config.cik.tile_config |=
3322                ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324        WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325        WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326        WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327        WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328        WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329        WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330        WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331        WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333        cik_tiling_mode_table_init(rdev);
3334
3335        cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336                     rdev->config.cik.max_sh_per_se,
3337                     rdev->config.cik.max_backends_per_se);
3338
3339        rdev->config.cik.active_cus = 0;
3340        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342                        rdev->config.cik.active_cus +=
3343                                hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344                }
3345        }
3346
3347        /* set HW defaults for 3D engine */
3348        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350        WREG32(SX_DEBUG_1, 0x20);
3351
3352        WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354        tmp = RREG32(SPI_CONFIG_CNTL);
3355        tmp |= 0x03000000;
3356        WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358        WREG32(SQ_CONFIG, 1);
3359
3360        WREG32(DB_DEBUG, 0);
3361
3362        tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363        tmp |= 0x00000400;
3364        WREG32(DB_DEBUG2, tmp);
3365
3366        tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367        tmp |= 0x00020200;
3368        WREG32(DB_DEBUG3, tmp);
3369
3370        tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371        tmp |= 0x00018208;
3372        WREG32(CB_HW_CONTROL, tmp);
3373
3374        WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376        WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377                                 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381        WREG32(VGT_NUM_INSTANCES, 1);
3382
3383        WREG32(CP_PERFMON_CNTL, 0);
3384
3385        WREG32(SQ_CONFIG, 0);
3386
3387        WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388                                          FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390        WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391               AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393        WREG32(VGT_GS_VERTEX_REUSE, 16);
3394        WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396        tmp = RREG32(HDP_MISC_CNTL);
3397        tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398        WREG32(HDP_MISC_CNTL, tmp);
3399
3400        hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401        WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403        WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404        WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406        udelay(50);
3407}
3408
3409/*
3410 * GPU scratch registers helpers function.
3411 */
3412/**
3413 * cik_scratch_init - setup driver info for CP scratch regs
3414 *
3415 * @rdev: radeon_device pointer
3416 *
3417 * Set up the number and offset of the CP scratch registers.
3418 * NOTE: use of CP scratch registers is a legacy inferface and
3419 * is not used by default on newer asics (r6xx+).  On newer asics,
3420 * memory buffers are used for fences rather than scratch regs.
3421 */
3422static void cik_scratch_init(struct radeon_device *rdev)
3423{
3424        int i;
3425
3426        rdev->scratch.num_reg = 7;
3427        rdev->scratch.reg_base = SCRATCH_REG0;
3428        for (i = 0; i < rdev->scratch.num_reg; i++) {
3429                rdev->scratch.free[i] = true;
3430                rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431        }
3432}
3433
3434/**
3435 * cik_ring_test - basic gfx ring test
3436 *
3437 * @rdev: radeon_device pointer
3438 * @ring: radeon_ring structure holding ring information
3439 *
3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
3441 * Provides a basic gfx ring test to verify that the ring is working.
3442 * Used by cik_cp_gfx_resume();
3443 * Returns 0 on success, error on failure.
3444 */
3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446{
3447        uint32_t scratch;
3448        uint32_t tmp = 0;
3449        unsigned i;
3450        int r;
3451
3452        r = radeon_scratch_get(rdev, &scratch);
3453        if (r) {
3454                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455                return r;
3456        }
3457        WREG32(scratch, 0xCAFEDEAD);
3458        r = radeon_ring_lock(rdev, ring, 3);
3459        if (r) {
3460                DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461                radeon_scratch_free(rdev, scratch);
3462                return r;
3463        }
3464        radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465        radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466        radeon_ring_write(ring, 0xDEADBEEF);
3467        radeon_ring_unlock_commit(rdev, ring, false);
3468
3469        for (i = 0; i < rdev->usec_timeout; i++) {
3470                tmp = RREG32(scratch);
3471                if (tmp == 0xDEADBEEF)
3472                        break;
3473                udelay(1);
3474        }
3475        if (i < rdev->usec_timeout) {
3476                DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477        } else {
3478                DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479                          ring->idx, scratch, tmp);
3480                r = -EINVAL;
3481        }
3482        radeon_scratch_free(rdev, scratch);
3483        return r;
3484}
3485
3486/**
3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488 *
3489 * @rdev: radeon_device pointer
3490 * @ridx: radeon ring index
3491 *
3492 * Emits an hdp flush on the cp.
3493 */
3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495                                       int ridx)
3496{
3497        struct radeon_ring *ring = &rdev->ring[ridx];
3498        u32 ref_and_mask;
3499
3500        switch (ring->idx) {
3501        case CAYMAN_RING_TYPE_CP1_INDEX:
3502        case CAYMAN_RING_TYPE_CP2_INDEX:
3503        default:
3504                switch (ring->me) {
3505                case 0:
3506                        ref_and_mask = CP2 << ring->pipe;
3507                        break;
3508                case 1:
3509                        ref_and_mask = CP6 << ring->pipe;
3510                        break;
3511                default:
3512                        return;
3513                }
3514                break;
3515        case RADEON_RING_TYPE_GFX_INDEX:
3516                ref_and_mask = CP0;
3517                break;
3518        }
3519
3520        radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521        radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523                                 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524        radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525        radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526        radeon_ring_write(ring, ref_and_mask);
3527        radeon_ring_write(ring, ref_and_mask);
3528        radeon_ring_write(ring, 0x20); /* poll interval */
3529}
3530
3531/**
3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533 *
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3536 *
3537 * Emits a fence sequnce number on the gfx ring and flushes
3538 * GPU caches.
3539 */
3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541                             struct radeon_fence *fence)
3542{
3543        struct radeon_ring *ring = &rdev->ring[fence->ring];
3544        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546        /* Workaround for cache flush problems. First send a dummy EOP
3547         * event down the pipe with seq one below.
3548         */
3549        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551                                 EOP_TC_ACTION_EN |
3552                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553                                 EVENT_INDEX(5)));
3554        radeon_ring_write(ring, addr & 0xfffffffc);
3555        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556                                DATA_SEL(1) | INT_SEL(0));
3557        radeon_ring_write(ring, fence->seq - 1);
3558        radeon_ring_write(ring, 0);
3559
3560        /* Then send the real EOP event down the pipe. */
3561        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563                                 EOP_TC_ACTION_EN |
3564                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565                                 EVENT_INDEX(5)));
3566        radeon_ring_write(ring, addr & 0xfffffffc);
3567        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568        radeon_ring_write(ring, fence->seq);
3569        radeon_ring_write(ring, 0);
3570}
3571
3572/**
3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574 *
3575 * @rdev: radeon_device pointer
3576 * @fence: radeon fence object
3577 *
3578 * Emits a fence sequnce number on the compute ring and flushes
3579 * GPU caches.
3580 */
3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582                                 struct radeon_fence *fence)
3583{
3584        struct radeon_ring *ring = &rdev->ring[fence->ring];
3585        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587        /* RELEASE_MEM - flush caches, send int */
3588        radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590                                 EOP_TC_ACTION_EN |
3591                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592                                 EVENT_INDEX(5)));
3593        radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594        radeon_ring_write(ring, addr & 0xfffffffc);
3595        radeon_ring_write(ring, upper_32_bits(addr));
3596        radeon_ring_write(ring, fence->seq);
3597        radeon_ring_write(ring, 0);
3598}
3599
3600/**
3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602 *
3603 * @rdev: radeon_device pointer
3604 * @ring: radeon ring buffer object
3605 * @semaphore: radeon semaphore object
3606 * @emit_wait: Is this a sempahore wait?
3607 *
3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609 * from running ahead of semaphore waits.
3610 */
3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612                             struct radeon_ring *ring,
3613                             struct radeon_semaphore *semaphore,
3614                             bool emit_wait)
3615{
3616        uint64_t addr = semaphore->gpu_addr;
3617        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619        radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620        radeon_ring_write(ring, lower_32_bits(addr));
3621        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623        if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624                /* Prevent the PFP from running ahead of the semaphore wait */
3625                radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626                radeon_ring_write(ring, 0x0);
3627        }
3628
3629        return true;
3630}
3631
3632/**
3633 * cik_copy_cpdma - copy pages using the CP DMA engine
3634 *
3635 * @rdev: radeon_device pointer
3636 * @src_offset: src GPU address
3637 * @dst_offset: dst GPU address
3638 * @num_gpu_pages: number of GPU pages to xfer
3639 * @resv: reservation object to sync to
3640 *
3641 * Copy GPU paging using the CP DMA engine (CIK+).
3642 * Used by the radeon ttm implementation to move pages if
3643 * registered as the asic copy callback.
3644 */
3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646                                    uint64_t src_offset, uint64_t dst_offset,
3647                                    unsigned num_gpu_pages,
3648                                    struct dma_resv *resv)
3649{
3650        struct radeon_fence *fence;
3651        struct radeon_sync sync;
3652        int ring_index = rdev->asic->copy.blit_ring_index;
3653        struct radeon_ring *ring = &rdev->ring[ring_index];
3654        u32 size_in_bytes, cur_size_in_bytes, control;
3655        int i, num_loops;
3656        int r = 0;
3657
3658        radeon_sync_create(&sync);
3659
3660        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661        num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662        r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663        if (r) {
3664                DRM_ERROR("radeon: moving bo (%d).\n", r);
3665                radeon_sync_free(rdev, &sync, NULL);
3666                return ERR_PTR(r);
3667        }
3668
3669        radeon_sync_resv(rdev, &sync, resv, false);
3670        radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672        for (i = 0; i < num_loops; i++) {
3673                cur_size_in_bytes = size_in_bytes;
3674                if (cur_size_in_bytes > 0x1fffff)
3675                        cur_size_in_bytes = 0x1fffff;
3676                size_in_bytes -= cur_size_in_bytes;
3677                control = 0;
3678                if (size_in_bytes == 0)
3679                        control |= PACKET3_DMA_DATA_CP_SYNC;
3680                radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681                radeon_ring_write(ring, control);
3682                radeon_ring_write(ring, lower_32_bits(src_offset));
3683                radeon_ring_write(ring, upper_32_bits(src_offset));
3684                radeon_ring_write(ring, lower_32_bits(dst_offset));
3685                radeon_ring_write(ring, upper_32_bits(dst_offset));
3686                radeon_ring_write(ring, cur_size_in_bytes);
3687                src_offset += cur_size_in_bytes;
3688                dst_offset += cur_size_in_bytes;
3689        }
3690
3691        r = radeon_fence_emit(rdev, &fence, ring->idx);
3692        if (r) {
3693                radeon_ring_unlock_undo(rdev, ring);
3694                radeon_sync_free(rdev, &sync, NULL);
3695                return ERR_PTR(r);
3696        }
3697
3698        radeon_ring_unlock_commit(rdev, ring, false);
3699        radeon_sync_free(rdev, &sync, fence);
3700
3701        return fence;
3702}
3703
3704/*
3705 * IB stuff
3706 */
3707/**
3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709 *
3710 * @rdev: radeon_device pointer
3711 * @ib: radeon indirect buffer object
3712 *
3713 * Emits a DE (drawing engine) or CE (constant engine) IB
3714 * on the gfx ring.  IBs are usually generated by userspace
3715 * acceleration drivers and submitted to the kernel for
3716 * scheduling on the ring.  This function schedules the IB
3717 * on the gfx ring for execution by the GPU.
3718 */
3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720{
3721        struct radeon_ring *ring = &rdev->ring[ib->ring];
3722        unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723        u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725        if (ib->is_const_ib) {
3726                /* set switch buffer packet before const IB */
3727                radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728                radeon_ring_write(ring, 0);
3729
3730                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731        } else {
3732                u32 next_rptr;
3733                if (ring->rptr_save_reg) {
3734                        next_rptr = ring->wptr + 3 + 4;
3735                        radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736                        radeon_ring_write(ring, ((ring->rptr_save_reg -
3737                                                  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738                        radeon_ring_write(ring, next_rptr);
3739                } else if (rdev->wb.enabled) {
3740                        next_rptr = ring->wptr + 5 + 4;
3741                        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742                        radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743                        radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744                        radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745                        radeon_ring_write(ring, next_rptr);
3746                }
3747
3748                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749        }
3750
3751        control |= ib->length_dw | (vm_id << 24);
3752
3753        radeon_ring_write(ring, header);
3754        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755        radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756        radeon_ring_write(ring, control);
3757}
3758
3759/**
3760 * cik_ib_test - basic gfx ring IB test
3761 *
3762 * @rdev: radeon_device pointer
3763 * @ring: radeon_ring structure holding ring information
3764 *
3765 * Allocate an IB and execute it on the gfx ring (CIK).
3766 * Provides a basic gfx ring test to verify that IBs are working.
3767 * Returns 0 on success, error on failure.
3768 */
3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770{
3771        struct radeon_ib ib;
3772        uint32_t scratch;
3773        uint32_t tmp = 0;
3774        unsigned i;
3775        int r;
3776
3777        r = radeon_scratch_get(rdev, &scratch);
3778        if (r) {
3779                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780                return r;
3781        }
3782        WREG32(scratch, 0xCAFEDEAD);
3783        r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784        if (r) {
3785                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786                radeon_scratch_free(rdev, scratch);
3787                return r;
3788        }
3789        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791        ib.ptr[2] = 0xDEADBEEF;
3792        ib.length_dw = 3;
3793        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794        if (r) {
3795                radeon_scratch_free(rdev, scratch);
3796                radeon_ib_free(rdev, &ib);
3797                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798                return r;
3799        }
3800        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801                RADEON_USEC_IB_TEST_TIMEOUT));
3802        if (r < 0) {
3803                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804                radeon_scratch_free(rdev, scratch);
3805                radeon_ib_free(rdev, &ib);
3806                return r;
3807        } else if (r == 0) {
3808                DRM_ERROR("radeon: fence wait timed out.\n");
3809                radeon_scratch_free(rdev, scratch);
3810                radeon_ib_free(rdev, &ib);
3811                return -ETIMEDOUT;
3812        }
3813        r = 0;
3814        for (i = 0; i < rdev->usec_timeout; i++) {
3815                tmp = RREG32(scratch);
3816                if (tmp == 0xDEADBEEF)
3817                        break;
3818                udelay(1);
3819        }
3820        if (i < rdev->usec_timeout) {
3821                DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822        } else {
3823                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824                          scratch, tmp);
3825                r = -EINVAL;
3826        }
3827        radeon_scratch_free(rdev, scratch);
3828        radeon_ib_free(rdev, &ib);
3829        return r;
3830}
3831
3832/*
3833 * CP.
3834 * On CIK, gfx and compute now have independant command processors.
3835 *
3836 * GFX
3837 * Gfx consists of a single ring and can process both gfx jobs and
3838 * compute jobs.  The gfx CP consists of three microengines (ME):
3839 * PFP - Pre-Fetch Parser
3840 * ME - Micro Engine
3841 * CE - Constant Engine
3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
3843 * The CE is an asynchronous engine used for updating buffer desciptors
3844 * used by the DE so that they can be loaded into cache in parallel
3845 * while the DE is processing state update packets.
3846 *
3847 * Compute
3848 * The compute CP consists of two microengines (ME):
3849 * MEC1 - Compute MicroEngine 1
3850 * MEC2 - Compute MicroEngine 2
3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852 * The queues are exposed to userspace and are programmed directly
3853 * by the compute runtime.
3854 */
3855/**
3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857 *
3858 * @rdev: radeon_device pointer
3859 * @enable: enable or disable the MEs
3860 *
3861 * Halts or unhalts the gfx MEs.
3862 */
3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864{
3865        if (enable)
3866                WREG32(CP_ME_CNTL, 0);
3867        else {
3868                if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869                        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870                WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872        }
3873        udelay(50);
3874}
3875
3876/**
3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878 *
3879 * @rdev: radeon_device pointer
3880 *
3881 * Loads the gfx PFP, ME, and CE ucode.
3882 * Returns 0 for success, -EINVAL if the ucode is not available.
3883 */
3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885{
3886        int i;
3887
3888        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889                return -EINVAL;
3890
3891        cik_cp_gfx_enable(rdev, false);
3892
3893        if (rdev->new_fw) {
3894                const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895                        (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896                const struct gfx_firmware_header_v1_0 *ce_hdr =
3897                        (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898                const struct gfx_firmware_header_v1_0 *me_hdr =
3899                        (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900                const __le32 *fw_data;
3901                u32 fw_size;
3902
3903                radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904                radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905                radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907                /* PFP */
3908                fw_data = (const __le32 *)
3909                        (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910                fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911                WREG32(CP_PFP_UCODE_ADDR, 0);
3912                for (i = 0; i < fw_size; i++)
3913                        WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914                WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916                /* CE */
3917                fw_data = (const __le32 *)
3918                        (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919                fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920                WREG32(CP_CE_UCODE_ADDR, 0);
3921                for (i = 0; i < fw_size; i++)
3922                        WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923                WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925                /* ME */
3926                fw_data = (const __be32 *)
3927                        (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928                fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929                WREG32(CP_ME_RAM_WADDR, 0);
3930                for (i = 0; i < fw_size; i++)
3931                        WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932                WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933                WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934        } else {
3935                const __be32 *fw_data;
3936
3937                /* PFP */
3938                fw_data = (const __be32 *)rdev->pfp_fw->data;
3939                WREG32(CP_PFP_UCODE_ADDR, 0);
3940                for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941                        WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942                WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944                /* CE */
3945                fw_data = (const __be32 *)rdev->ce_fw->data;
3946                WREG32(CP_CE_UCODE_ADDR, 0);
3947                for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948                        WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949                WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951                /* ME */
3952                fw_data = (const __be32 *)rdev->me_fw->data;
3953                WREG32(CP_ME_RAM_WADDR, 0);
3954                for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955                        WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956                WREG32(CP_ME_RAM_WADDR, 0);
3957        }
3958
3959        return 0;
3960}
3961
3962/**
3963 * cik_cp_gfx_start - start the gfx ring
3964 *
3965 * @rdev: radeon_device pointer
3966 *
3967 * Enables the ring and loads the clear state context and other
3968 * packets required to init the ring.
3969 * Returns 0 for success, error for failure.
3970 */
3971static int cik_cp_gfx_start(struct radeon_device *rdev)
3972{
3973        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974        int r, i;
3975
3976        /* init the CP */
3977        WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978        WREG32(CP_ENDIAN_SWAP, 0);
3979        WREG32(CP_DEVICE_ID, 1);
3980
3981        cik_cp_gfx_enable(rdev, true);
3982
3983        r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984        if (r) {
3985                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986                return r;
3987        }
3988
3989        /* init the CE partitions.  CE only used for gfx on CIK */
3990        radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991        radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992        radeon_ring_write(ring, 0x8000);
3993        radeon_ring_write(ring, 0x8000);
3994
3995        /* setup clear context state */
3996        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997        radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999        radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000        radeon_ring_write(ring, 0x80000000);
4001        radeon_ring_write(ring, 0x80000000);
4002
4003        for (i = 0; i < cik_default_size; i++)
4004                radeon_ring_write(ring, cik_default_state[i]);
4005
4006        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007        radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009        /* set clear context state */
4010        radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011        radeon_ring_write(ring, 0);
4012
4013        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014        radeon_ring_write(ring, 0x00000316);
4015        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016        radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018        radeon_ring_unlock_commit(rdev, ring, false);
4019
4020        return 0;
4021}
4022
4023/**
4024 * cik_cp_gfx_fini - stop the gfx ring
4025 *
4026 * @rdev: radeon_device pointer
4027 *
4028 * Stop the gfx ring and tear down the driver ring
4029 * info.
4030 */
4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032{
4033        cik_cp_gfx_enable(rdev, false);
4034        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035}
4036
4037/**
4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Program the location and size of the gfx ring buffer
4043 * and test it to make sure it's working.
4044 * Returns 0 for success, error for failure.
4045 */
4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047{
4048        struct radeon_ring *ring;
4049        u32 tmp;
4050        u32 rb_bufsz;
4051        u64 rb_addr;
4052        int r;
4053
4054        WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055        if (rdev->family != CHIP_HAWAII)
4056                WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058        /* Set the write pointer delay */
4059        WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061        /* set the RB to use vmid 0 */
4062        WREG32(CP_RB_VMID, 0);
4063
4064        WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066        /* ring 0 - compute and gfx */
4067        /* Set ring buffer size */
4068        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069        rb_bufsz = order_base_2(ring->ring_size / 8);
4070        tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071#ifdef __BIG_ENDIAN
4072        tmp |= BUF_SWAP_32BIT;
4073#endif
4074        WREG32(CP_RB0_CNTL, tmp);
4075
4076        /* Initialize the ring buffer's read and write pointers */
4077        WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078        ring->wptr = 0;
4079        WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081        /* set the wb address wether it's enabled or not */
4082        WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083        WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085        /* scratch register shadowing is no longer supported */
4086        WREG32(SCRATCH_UMSK, 0);
4087
4088        if (!rdev->wb.enabled)
4089                tmp |= RB_NO_UPDATE;
4090
4091        mdelay(1);
4092        WREG32(CP_RB0_CNTL, tmp);
4093
4094        rb_addr = ring->gpu_addr >> 8;
4095        WREG32(CP_RB0_BASE, rb_addr);
4096        WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098        /* start the ring */
4099        cik_cp_gfx_start(rdev);
4100        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102        if (r) {
4103                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104                return r;
4105        }
4106
4107        if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108                radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110        return 0;
4111}
4112
4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114                     struct radeon_ring *ring)
4115{
4116        u32 rptr;
4117
4118        if (rdev->wb.enabled)
4119                rptr = rdev->wb.wb[ring->rptr_offs/4];
4120        else
4121                rptr = RREG32(CP_RB0_RPTR);
4122
4123        return rptr;
4124}
4125
4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127                     struct radeon_ring *ring)
4128{
4129        return RREG32(CP_RB0_WPTR);
4130}
4131
4132void cik_gfx_set_wptr(struct radeon_device *rdev,
4133                      struct radeon_ring *ring)
4134{
4135        WREG32(CP_RB0_WPTR, ring->wptr);
4136        (void)RREG32(CP_RB0_WPTR);
4137}
4138
4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140                         struct radeon_ring *ring)
4141{
4142        u32 rptr;
4143
4144        if (rdev->wb.enabled) {
4145                rptr = rdev->wb.wb[ring->rptr_offs/4];
4146        } else {
4147                mutex_lock(&rdev->srbm_mutex);
4148                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149                rptr = RREG32(CP_HQD_PQ_RPTR);
4150                cik_srbm_select(rdev, 0, 0, 0, 0);
4151                mutex_unlock(&rdev->srbm_mutex);
4152        }
4153
4154        return rptr;
4155}
4156
4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158                         struct radeon_ring *ring)
4159{
4160        u32 wptr;
4161
4162        if (rdev->wb.enabled) {
4163                /* XXX check if swapping is necessary on BE */
4164                wptr = rdev->wb.wb[ring->wptr_offs/4];
4165        } else {
4166                mutex_lock(&rdev->srbm_mutex);
4167                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168                wptr = RREG32(CP_HQD_PQ_WPTR);
4169                cik_srbm_select(rdev, 0, 0, 0, 0);
4170                mutex_unlock(&rdev->srbm_mutex);
4171        }
4172
4173        return wptr;
4174}
4175
4176void cik_compute_set_wptr(struct radeon_device *rdev,
4177                          struct radeon_ring *ring)
4178{
4179        /* XXX check if swapping is necessary on BE */
4180        rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181        WDOORBELL32(ring->doorbell_index, ring->wptr);
4182}
4183
4184static void cik_compute_stop(struct radeon_device *rdev,
4185                             struct radeon_ring *ring)
4186{
4187        u32 j, tmp;
4188
4189        cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190        /* Disable wptr polling. */
4191        tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192        tmp &= ~WPTR_POLL_EN;
4193        WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194        /* Disable HQD. */
4195        if (RREG32(CP_HQD_ACTIVE) & 1) {
4196                WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197                for (j = 0; j < rdev->usec_timeout; j++) {
4198                        if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199                                break;
4200                        udelay(1);
4201                }
4202                WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203                WREG32(CP_HQD_PQ_RPTR, 0);
4204                WREG32(CP_HQD_PQ_WPTR, 0);
4205        }
4206        cik_srbm_select(rdev, 0, 0, 0, 0);
4207}
4208
4209/**
4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
4211 *
4212 * @rdev: radeon_device pointer
4213 * @enable: enable or disable the MEs
4214 *
4215 * Halts or unhalts the compute MEs.
4216 */
4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218{
4219        if (enable)
4220                WREG32(CP_MEC_CNTL, 0);
4221        else {
4222                /*
4223                 * To make hibernation reliable we need to clear compute ring
4224                 * configuration before halting the compute ring.
4225                 */
4226                mutex_lock(&rdev->srbm_mutex);
4227                cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228                cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229                mutex_unlock(&rdev->srbm_mutex);
4230
4231                WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234        }
4235        udelay(50);
4236}
4237
4238/**
4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Loads the compute MEC1&2 ucode.
4244 * Returns 0 for success, -EINVAL if the ucode is not available.
4245 */
4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247{
4248        int i;
4249
4250        if (!rdev->mec_fw)
4251                return -EINVAL;
4252
4253        cik_cp_compute_enable(rdev, false);
4254
4255        if (rdev->new_fw) {
4256                const struct gfx_firmware_header_v1_0 *mec_hdr =
4257                        (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258                const __le32 *fw_data;
4259                u32 fw_size;
4260
4261                radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263                /* MEC1 */
4264                fw_data = (const __le32 *)
4265                        (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266                fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268                for (i = 0; i < fw_size; i++)
4269                        WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270                WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272                /* MEC2 */
4273                if (rdev->family == CHIP_KAVERI) {
4274                        const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275                                (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277                        fw_data = (const __le32 *)
4278                                (rdev->mec2_fw->data +
4279                                 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280                        fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282                        for (i = 0; i < fw_size; i++)
4283                                WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284                        WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285                }
4286        } else {
4287                const __be32 *fw_data;
4288
4289                /* MEC1 */
4290                fw_data = (const __be32 *)rdev->mec_fw->data;
4291                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292                for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293                        WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296                if (rdev->family == CHIP_KAVERI) {
4297                        /* MEC2 */
4298                        fw_data = (const __be32 *)rdev->mec_fw->data;
4299                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300                        for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301                                WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303                }
4304        }
4305
4306        return 0;
4307}
4308
4309/**
4310 * cik_cp_compute_start - start the compute queues
4311 *
4312 * @rdev: radeon_device pointer
4313 *
4314 * Enable the compute queues.
4315 * Returns 0 for success, error for failure.
4316 */
4317static int cik_cp_compute_start(struct radeon_device *rdev)
4318{
4319        cik_cp_compute_enable(rdev, true);
4320
4321        return 0;
4322}
4323
4324/**
4325 * cik_cp_compute_fini - stop the compute queues
4326 *
4327 * @rdev: radeon_device pointer
4328 *
4329 * Stop the compute queues and tear down the driver queue
4330 * info.
4331 */
4332static void cik_cp_compute_fini(struct radeon_device *rdev)
4333{
4334        int i, idx, r;
4335
4336        cik_cp_compute_enable(rdev, false);
4337
4338        for (i = 0; i < 2; i++) {
4339                if (i == 0)
4340                        idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341                else
4342                        idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344                if (rdev->ring[idx].mqd_obj) {
4345                        r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346                        if (unlikely(r != 0))
4347                                dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349                        radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350                        radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352                        radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353                        rdev->ring[idx].mqd_obj = NULL;
4354                }
4355        }
4356}
4357
4358static void cik_mec_fini(struct radeon_device *rdev)
4359{
4360        int r;
4361
4362        if (rdev->mec.hpd_eop_obj) {
4363                r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364                if (unlikely(r != 0))
4365                        dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366                radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367                radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369                radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370                rdev->mec.hpd_eop_obj = NULL;
4371        }
4372}
4373
4374#define MEC_HPD_SIZE 2048
4375
4376static int cik_mec_init(struct radeon_device *rdev)
4377{
4378        int r;
4379        u32 *hpd;
4380
4381        /*
4382         * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383         * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384         */
4385        if (rdev->family == CHIP_KAVERI)
4386                rdev->mec.num_mec = 2;
4387        else
4388                rdev->mec.num_mec = 1;
4389        rdev->mec.num_pipe = 4;
4390        rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392        if (rdev->mec.hpd_eop_obj == NULL) {
4393                r = radeon_bo_create(rdev,
4394                                     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395                                     PAGE_SIZE, true,
4396                                     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397                                     &rdev->mec.hpd_eop_obj);
4398                if (r) {
4399                        dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400                        return r;
4401                }
4402        }
4403
4404        r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405        if (unlikely(r != 0)) {
4406                cik_mec_fini(rdev);
4407                return r;
4408        }
4409        r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410                          &rdev->mec.hpd_eop_gpu_addr);
4411        if (r) {
4412                dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413                cik_mec_fini(rdev);
4414                return r;
4415        }
4416        r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417        if (r) {
4418                dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419                cik_mec_fini(rdev);
4420                return r;
4421        }
4422
4423        /* clear memory.  Not sure if this is required or not */
4424        memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426        radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427        radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429        return 0;
4430}
4431
4432struct hqd_registers
4433{
4434        u32 cp_mqd_base_addr;
4435        u32 cp_mqd_base_addr_hi;
4436        u32 cp_hqd_active;
4437        u32 cp_hqd_vmid;
4438        u32 cp_hqd_persistent_state;
4439        u32 cp_hqd_pipe_priority;
4440        u32 cp_hqd_queue_priority;
4441        u32 cp_hqd_quantum;
4442        u32 cp_hqd_pq_base;
4443        u32 cp_hqd_pq_base_hi;
4444        u32 cp_hqd_pq_rptr;
4445        u32 cp_hqd_pq_rptr_report_addr;
4446        u32 cp_hqd_pq_rptr_report_addr_hi;
4447        u32 cp_hqd_pq_wptr_poll_addr;
4448        u32 cp_hqd_pq_wptr_poll_addr_hi;
4449        u32 cp_hqd_pq_doorbell_control;
4450        u32 cp_hqd_pq_wptr;
4451        u32 cp_hqd_pq_control;
4452        u32 cp_hqd_ib_base_addr;
4453        u32 cp_hqd_ib_base_addr_hi;
4454        u32 cp_hqd_ib_rptr;
4455        u32 cp_hqd_ib_control;
4456        u32 cp_hqd_iq_timer;
4457        u32 cp_hqd_iq_rptr;
4458        u32 cp_hqd_dequeue_request;
4459        u32 cp_hqd_dma_offload;
4460        u32 cp_hqd_sema_cmd;
4461        u32 cp_hqd_msg_type;
4462        u32 cp_hqd_atomic0_preop_lo;
4463        u32 cp_hqd_atomic0_preop_hi;
4464        u32 cp_hqd_atomic1_preop_lo;
4465        u32 cp_hqd_atomic1_preop_hi;
4466        u32 cp_hqd_hq_scheduler0;
4467        u32 cp_hqd_hq_scheduler1;
4468        u32 cp_mqd_control;
4469};
4470
4471struct bonaire_mqd
4472{
4473        u32 header;
4474        u32 dispatch_initiator;
4475        u32 dimensions[3];
4476        u32 start_idx[3];
4477        u32 num_threads[3];
4478        u32 pipeline_stat_enable;
4479        u32 perf_counter_enable;
4480        u32 pgm[2];
4481        u32 tba[2];
4482        u32 tma[2];
4483        u32 pgm_rsrc[2];
4484        u32 vmid;
4485        u32 resource_limits;
4486        u32 static_thread_mgmt01[2];
4487        u32 tmp_ring_size;
4488        u32 static_thread_mgmt23[2];
4489        u32 restart[3];
4490        u32 thread_trace_enable;
4491        u32 reserved1;
4492        u32 user_data[16];
4493        u32 vgtcs_invoke_count[2];
4494        struct hqd_registers queue_state;
4495        u32 dequeue_cntr;
4496        u32 interrupt_queue[64];
4497};
4498
4499/**
4500 * cik_cp_compute_resume - setup the compute queue registers
4501 *
4502 * @rdev: radeon_device pointer
4503 *
4504 * Program the compute queues and test them to make sure they
4505 * are working.
4506 * Returns 0 for success, error for failure.
4507 */
4508static int cik_cp_compute_resume(struct radeon_device *rdev)
4509{
4510        int r, i, j, idx;
4511        u32 tmp;
4512        bool use_doorbell = true;
4513        u64 hqd_gpu_addr;
4514        u64 mqd_gpu_addr;
4515        u64 eop_gpu_addr;
4516        u64 wb_gpu_addr;
4517        u32 *buf;
4518        struct bonaire_mqd *mqd;
4519
4520        r = cik_cp_compute_start(rdev);
4521        if (r)
4522                return r;
4523
4524        /* fix up chicken bits */
4525        tmp = RREG32(CP_CPF_DEBUG);
4526        tmp |= (1 << 23);
4527        WREG32(CP_CPF_DEBUG, tmp);
4528
4529        /* init the pipes */
4530        mutex_lock(&rdev->srbm_mutex);
4531
4532        for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533                int me = (i < 4) ? 1 : 2;
4534                int pipe = (i < 4) ? i : (i - 4);
4535
4536                cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538                eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539                /* write the EOP addr */
4540                WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541                WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543                /* set the VMID assigned */
4544                WREG32(CP_HPD_EOP_VMID, 0);
4545
4546                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547                tmp = RREG32(CP_HPD_EOP_CONTROL);
4548                tmp &= ~EOP_SIZE_MASK;
4549                tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550                WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552        }
4553        cik_srbm_select(rdev, 0, 0, 0, 0);
4554        mutex_unlock(&rdev->srbm_mutex);
4555
4556        /* init the queues.  Just two for now. */
4557        for (i = 0; i < 2; i++) {
4558                if (i == 0)
4559                        idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560                else
4561                        idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563                if (rdev->ring[idx].mqd_obj == NULL) {
4564                        r = radeon_bo_create(rdev,
4565                                             sizeof(struct bonaire_mqd),
4566                                             PAGE_SIZE, true,
4567                                             RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568                                             NULL, &rdev->ring[idx].mqd_obj);
4569                        if (r) {
4570                                dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571                                return r;
4572                        }
4573                }
4574
4575                r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576                if (unlikely(r != 0)) {
4577                        cik_cp_compute_fini(rdev);
4578                        return r;
4579                }
4580                r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581                                  &mqd_gpu_addr);
4582                if (r) {
4583                        dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584                        cik_cp_compute_fini(rdev);
4585                        return r;
4586                }
4587                r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588                if (r) {
4589                        dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590                        cik_cp_compute_fini(rdev);
4591                        return r;
4592                }
4593
4594                /* init the mqd struct */
4595                memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597                mqd = (struct bonaire_mqd *)buf;
4598                mqd->header = 0xC0310800;
4599                mqd->static_thread_mgmt01[0] = 0xffffffff;
4600                mqd->static_thread_mgmt01[1] = 0xffffffff;
4601                mqd->static_thread_mgmt23[0] = 0xffffffff;
4602                mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604                mutex_lock(&rdev->srbm_mutex);
4605                cik_srbm_select(rdev, rdev->ring[idx].me,
4606                                rdev->ring[idx].pipe,
4607                                rdev->ring[idx].queue, 0);
4608
4609                /* disable wptr polling */
4610                tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611                tmp &= ~WPTR_POLL_EN;
4612                WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614                /* enable doorbell? */
4615                mqd->queue_state.cp_hqd_pq_doorbell_control =
4616                        RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617                if (use_doorbell)
4618                        mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619                else
4620                        mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621                WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622                       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624                /* disable the queue if it's active */
4625                mqd->queue_state.cp_hqd_dequeue_request = 0;
4626                mqd->queue_state.cp_hqd_pq_rptr = 0;
4627                mqd->queue_state.cp_hqd_pq_wptr= 0;
4628                if (RREG32(CP_HQD_ACTIVE) & 1) {
4629                        WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630                        for (j = 0; j < rdev->usec_timeout; j++) {
4631                                if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632                                        break;
4633                                udelay(1);
4634                        }
4635                        WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636                        WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637                        WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638                }
4639
4640                /* set the pointer to the MQD */
4641                mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642                mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643                WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644                WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645                /* set MQD vmid to 0 */
4646                mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647                mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648                WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650                /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651                hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652                mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653                mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654                WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655                WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657                /* set up the HQD, this is similar to CP_RB0_CNTL */
4658                mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659                mqd->queue_state.cp_hqd_pq_control &=
4660                        ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662                mqd->queue_state.cp_hqd_pq_control |=
4663                        order_base_2(rdev->ring[idx].ring_size / 8);
4664                mqd->queue_state.cp_hqd_pq_control |=
4665                        (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666#ifdef __BIG_ENDIAN
4667                mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668#endif
4669                mqd->queue_state.cp_hqd_pq_control &=
4670                        ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671                mqd->queue_state.cp_hqd_pq_control |=
4672                        PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673                WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675                /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676                if (i == 0)
4677                        wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678                else
4679                        wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680                mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681                mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682                WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683                WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684                       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686                /* set the wb address wether it's enabled or not */
4687                if (i == 0)
4688                        wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689                else
4690                        wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691                mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692                mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693                        upper_32_bits(wb_gpu_addr) & 0xffff;
4694                WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695                       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696                WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697                       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699                /* enable the doorbell if requested */
4700                if (use_doorbell) {
4701                        mqd->queue_state.cp_hqd_pq_doorbell_control =
4702                                RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703                        mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704                        mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705                                DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706                        mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707                        mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708                                ~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710                } else {
4711                        mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712                }
4713                WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714                       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716                /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717                rdev->ring[idx].wptr = 0;
4718                mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719                WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720                mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722                /* set the vmid for the queue */
4723                mqd->queue_state.cp_hqd_vmid = 0;
4724                WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726                /* activate the queue */
4727                mqd->queue_state.cp_hqd_active = 1;
4728                WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730                cik_srbm_select(rdev, 0, 0, 0, 0);
4731                mutex_unlock(&rdev->srbm_mutex);
4732
4733                radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734                radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736                rdev->ring[idx].ready = true;
4737                r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738                if (r)
4739                        rdev->ring[idx].ready = false;
4740        }
4741
4742        return 0;
4743}
4744
4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746{
4747        cik_cp_gfx_enable(rdev, enable);
4748        cik_cp_compute_enable(rdev, enable);
4749}
4750
4751static int cik_cp_load_microcode(struct radeon_device *rdev)
4752{
4753        int r;
4754
4755        r = cik_cp_gfx_load_microcode(rdev);
4756        if (r)
4757                return r;
4758        r = cik_cp_compute_load_microcode(rdev);
4759        if (r)
4760                return r;
4761
4762        return 0;
4763}
4764
4765static void cik_cp_fini(struct radeon_device *rdev)
4766{
4767        cik_cp_gfx_fini(rdev);
4768        cik_cp_compute_fini(rdev);
4769}
4770
4771static int cik_cp_resume(struct radeon_device *rdev)
4772{
4773        int r;
4774
4775        cik_enable_gui_idle_interrupt(rdev, false);
4776
4777        r = cik_cp_load_microcode(rdev);
4778        if (r)
4779                return r;
4780
4781        r = cik_cp_gfx_resume(rdev);
4782        if (r)
4783                return r;
4784        r = cik_cp_compute_resume(rdev);
4785        if (r)
4786                return r;
4787
4788        cik_enable_gui_idle_interrupt(rdev, true);
4789
4790        return 0;
4791}
4792
4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794{
4795        dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796                RREG32(GRBM_STATUS));
4797        dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798                RREG32(GRBM_STATUS2));
4799        dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800                RREG32(GRBM_STATUS_SE0));
4801        dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802                RREG32(GRBM_STATUS_SE1));
4803        dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804                RREG32(GRBM_STATUS_SE2));
4805        dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806                RREG32(GRBM_STATUS_SE3));
4807        dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808                RREG32(SRBM_STATUS));
4809        dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810                RREG32(SRBM_STATUS2));
4811        dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812                RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813        dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814                 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815        dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816        dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817                 RREG32(CP_STALLED_STAT1));
4818        dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819                 RREG32(CP_STALLED_STAT2));
4820        dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821                 RREG32(CP_STALLED_STAT3));
4822        dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823                 RREG32(CP_CPF_BUSY_STAT));
4824        dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825                 RREG32(CP_CPF_STALLED_STAT1));
4826        dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827        dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828        dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829                 RREG32(CP_CPC_STALLED_STAT1));
4830        dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831}
4832
4833/**
4834 * cik_gpu_check_soft_reset - check which blocks are busy
4835 *
4836 * @rdev: radeon_device pointer
4837 *
4838 * Check which blocks are busy and return the relevant reset
4839 * mask to be used by cik_gpu_soft_reset().
4840 * Returns a mask of the blocks to be reset.
4841 */
4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843{
4844        u32 reset_mask = 0;
4845        u32 tmp;
4846
4847        /* GRBM_STATUS */
4848        tmp = RREG32(GRBM_STATUS);
4849        if (tmp & (PA_BUSY | SC_BUSY |
4850                   BCI_BUSY | SX_BUSY |
4851                   TA_BUSY | VGT_BUSY |
4852                   DB_BUSY | CB_BUSY |
4853                   GDS_BUSY | SPI_BUSY |
4854                   IA_BUSY | IA_BUSY_NO_DMA))
4855                reset_mask |= RADEON_RESET_GFX;
4856
4857        if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858                reset_mask |= RADEON_RESET_CP;
4859
4860        /* GRBM_STATUS2 */
4861        tmp = RREG32(GRBM_STATUS2);
4862        if (tmp & RLC_BUSY)
4863                reset_mask |= RADEON_RESET_RLC;
4864
4865        /* SDMA0_STATUS_REG */
4866        tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867        if (!(tmp & SDMA_IDLE))
4868                reset_mask |= RADEON_RESET_DMA;
4869
4870        /* SDMA1_STATUS_REG */
4871        tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872        if (!(tmp & SDMA_IDLE))
4873                reset_mask |= RADEON_RESET_DMA1;
4874
4875        /* SRBM_STATUS2 */
4876        tmp = RREG32(SRBM_STATUS2);
4877        if (tmp & SDMA_BUSY)
4878                reset_mask |= RADEON_RESET_DMA;
4879
4880        if (tmp & SDMA1_BUSY)
4881                reset_mask |= RADEON_RESET_DMA1;
4882
4883        /* SRBM_STATUS */
4884        tmp = RREG32(SRBM_STATUS);
4885
4886        if (tmp & IH_BUSY)
4887                reset_mask |= RADEON_RESET_IH;
4888
4889        if (tmp & SEM_BUSY)
4890                reset_mask |= RADEON_RESET_SEM;
4891
4892        if (tmp & GRBM_RQ_PENDING)
4893                reset_mask |= RADEON_RESET_GRBM;
4894
4895        if (tmp & VMC_BUSY)
4896                reset_mask |= RADEON_RESET_VMC;
4897
4898        if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899                   MCC_BUSY | MCD_BUSY))
4900                reset_mask |= RADEON_RESET_MC;
4901
4902        if (evergreen_is_display_hung(rdev))
4903                reset_mask |= RADEON_RESET_DISPLAY;
4904
4905        /* Skip MC reset as it's mostly likely not hung, just busy */
4906        if (reset_mask & RADEON_RESET_MC) {
4907                DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908                reset_mask &= ~RADEON_RESET_MC;
4909        }
4910
4911        return reset_mask;
4912}
4913
4914/**
4915 * cik_gpu_soft_reset - soft reset GPU
4916 *
4917 * @rdev: radeon_device pointer
4918 * @reset_mask: mask of which blocks to reset
4919 *
4920 * Soft reset the blocks specified in @reset_mask.
4921 */
4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923{
4924        struct evergreen_mc_save save;
4925        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926        u32 tmp;
4927
4928        if (reset_mask == 0)
4929                return;
4930
4931        dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933        cik_print_gpu_status_regs(rdev);
4934        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939        /* disable CG/PG */
4940        cik_fini_pg(rdev);
4941        cik_fini_cg(rdev);
4942
4943        /* stop the rlc */
4944        cik_rlc_stop(rdev);
4945
4946        /* Disable GFX parsing/prefetching */
4947        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949        /* Disable MEC parsing/prefetching */
4950        WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952        if (reset_mask & RADEON_RESET_DMA) {
4953                /* sdma0 */
4954                tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955                tmp |= SDMA_HALT;
4956                WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957        }
4958        if (reset_mask & RADEON_RESET_DMA1) {
4959                /* sdma1 */
4960                tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961                tmp |= SDMA_HALT;
4962                WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963        }
4964
4965        evergreen_mc_stop(rdev, &save);
4966        if (evergreen_mc_wait_for_idle(rdev)) {
4967                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968        }
4969
4970        if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971                grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973        if (reset_mask & RADEON_RESET_CP) {
4974                grbm_soft_reset |= SOFT_RESET_CP;
4975
4976                srbm_soft_reset |= SOFT_RESET_GRBM;
4977        }
4978
4979        if (reset_mask & RADEON_RESET_DMA)
4980                srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982        if (reset_mask & RADEON_RESET_DMA1)
4983                srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985        if (reset_mask & RADEON_RESET_DISPLAY)
4986                srbm_soft_reset |= SOFT_RESET_DC;
4987
4988        if (reset_mask & RADEON_RESET_RLC)
4989                grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991        if (reset_mask & RADEON_RESET_SEM)
4992                srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994        if (reset_mask & RADEON_RESET_IH)
4995                srbm_soft_reset |= SOFT_RESET_IH;
4996
4997        if (reset_mask & RADEON_RESET_GRBM)
4998                srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000        if (reset_mask & RADEON_RESET_VMC)
5001                srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003        if (!(rdev->flags & RADEON_IS_IGP)) {
5004                if (reset_mask & RADEON_RESET_MC)
5005                        srbm_soft_reset |= SOFT_RESET_MC;
5006        }
5007
5008        if (grbm_soft_reset) {
5009                tmp = RREG32(GRBM_SOFT_RESET);
5010                tmp |= grbm_soft_reset;
5011                dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012                WREG32(GRBM_SOFT_RESET, tmp);
5013                tmp = RREG32(GRBM_SOFT_RESET);
5014
5015                udelay(50);
5016
5017                tmp &= ~grbm_soft_reset;
5018                WREG32(GRBM_SOFT_RESET, tmp);
5019                tmp = RREG32(GRBM_SOFT_RESET);
5020        }
5021
5022        if (srbm_soft_reset) {
5023                tmp = RREG32(SRBM_SOFT_RESET);
5024                tmp |= srbm_soft_reset;
5025                dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026                WREG32(SRBM_SOFT_RESET, tmp);
5027                tmp = RREG32(SRBM_SOFT_RESET);
5028
5029                udelay(50);
5030
5031                tmp &= ~srbm_soft_reset;
5032                WREG32(SRBM_SOFT_RESET, tmp);
5033                tmp = RREG32(SRBM_SOFT_RESET);
5034        }
5035
5036        /* Wait a little for things to settle down */
5037        udelay(50);
5038
5039        evergreen_mc_resume(rdev, &save);
5040        udelay(50);
5041
5042        cik_print_gpu_status_regs(rdev);
5043}
5044
5045struct kv_reset_save_regs {
5046        u32 gmcon_reng_execute;
5047        u32 gmcon_misc;
5048        u32 gmcon_misc3;
5049};
5050
5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052                                   struct kv_reset_save_regs *save)
5053{
5054        save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055        save->gmcon_misc = RREG32(GMCON_MISC);
5056        save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058        WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059        WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060                                                STCTRL_STUTTER_EN));
5061}
5062
5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064                                      struct kv_reset_save_regs *save)
5065{
5066        int i;
5067
5068        WREG32(GMCON_PGFSM_WRITE, 0);
5069        WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071        for (i = 0; i < 5; i++)
5072                WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074        WREG32(GMCON_PGFSM_WRITE, 0);
5075        WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077        for (i = 0; i < 5; i++)
5078                WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080        WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081        WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083        for (i = 0; i < 5; i++)
5084                WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086        WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087        WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089        for (i = 0; i < 5; i++)
5090                WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092        WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093        WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095        for (i = 0; i < 5; i++)
5096                WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098        WREG32(GMCON_PGFSM_WRITE, 0);
5099        WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101        for (i = 0; i < 5; i++)
5102                WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104        WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105        WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107        for (i = 0; i < 5; i++)
5108                WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110        WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111        WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113        for (i = 0; i < 5; i++)
5114                WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116        WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117        WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119        for (i = 0; i < 5; i++)
5120                WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122        WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123        WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125        for (i = 0; i < 5; i++)
5126                WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128        WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129        WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131        WREG32(GMCON_MISC3, save->gmcon_misc3);
5132        WREG32(GMCON_MISC, save->gmcon_misc);
5133        WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134}
5135
5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137{
5138        struct evergreen_mc_save save;
5139        struct kv_reset_save_regs kv_save = { 0 };
5140        u32 tmp, i;
5141
5142        dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144        /* disable dpm? */
5145
5146        /* disable cg/pg */
5147        cik_fini_pg(rdev);
5148        cik_fini_cg(rdev);
5149
5150        /* Disable GFX parsing/prefetching */
5151        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153        /* Disable MEC parsing/prefetching */
5154        WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156        /* sdma0 */
5157        tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158        tmp |= SDMA_HALT;
5159        WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160        /* sdma1 */
5161        tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162        tmp |= SDMA_HALT;
5163        WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164        /* XXX other engines? */
5165
5166        /* halt the rlc, disable cp internal ints */
5167        cik_rlc_stop(rdev);
5168
5169        udelay(50);
5170
5171        /* disable mem access */
5172        evergreen_mc_stop(rdev, &save);
5173        if (evergreen_mc_wait_for_idle(rdev)) {
5174                dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175        }
5176
5177        if (rdev->flags & RADEON_IS_IGP)
5178                kv_save_regs_for_reset(rdev, &kv_save);
5179
5180        /* disable BM */
5181        pci_clear_master(rdev->pdev);
5182        /* reset */
5183        radeon_pci_config_reset(rdev);
5184
5185        udelay(100);
5186
5187        /* wait for asic to come out of reset */
5188        for (i = 0; i < rdev->usec_timeout; i++) {
5189                if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190                        break;
5191                udelay(1);
5192        }
5193
5194        /* does asic init need to be run first??? */
5195        if (rdev->flags & RADEON_IS_IGP)
5196                kv_restore_regs_for_reset(rdev, &kv_save);
5197}
5198
5199/**
5200 * cik_asic_reset - soft reset GPU
5201 *
5202 * @rdev: radeon_device pointer
5203 * @hard: force hard reset
5204 *
5205 * Look up which blocks are hung and attempt
5206 * to reset them.
5207 * Returns 0 for success.
5208 */
5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210{
5211        u32 reset_mask;
5212
5213        if (hard) {
5214                cik_gpu_pci_config_reset(rdev);
5215                return 0;
5216        }
5217
5218        reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220        if (reset_mask)
5221                r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223        /* try soft reset */
5224        cik_gpu_soft_reset(rdev, reset_mask);
5225
5226        reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228        /* try pci config reset */
5229        if (reset_mask && radeon_hard_reset)
5230                cik_gpu_pci_config_reset(rdev);
5231
5232        reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234        if (!reset_mask)
5235                r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237        return 0;
5238}
5239
5240/**
5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
5242 *
5243 * @rdev: radeon_device pointer
5244 * @ring: radeon_ring structure holding ring information
5245 *
5246 * Check if the 3D engine is locked up (CIK).
5247 * Returns true if the engine is locked, false if not.
5248 */
5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250{
5251        u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253        if (!(reset_mask & (RADEON_RESET_GFX |
5254                            RADEON_RESET_COMPUTE |
5255                            RADEON_RESET_CP))) {
5256                radeon_ring_lockup_update(rdev, ring);
5257                return false;
5258        }
5259        return radeon_ring_test_lockup(rdev, ring);
5260}
5261
5262/* MC */
5263/**
5264 * cik_mc_program - program the GPU memory controller
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Set the location of vram, gart, and AGP in the GPU's
5269 * physical address space (CIK).
5270 */
5271static void cik_mc_program(struct radeon_device *rdev)
5272{
5273        struct evergreen_mc_save save;
5274        u32 tmp;
5275        int i, j;
5276
5277        /* Initialize HDP */
5278        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279                WREG32((0x2c14 + j), 0x00000000);
5280                WREG32((0x2c18 + j), 0x00000000);
5281                WREG32((0x2c1c + j), 0x00000000);
5282                WREG32((0x2c20 + j), 0x00000000);
5283                WREG32((0x2c24 + j), 0x00000000);
5284        }
5285        WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287        evergreen_mc_stop(rdev, &save);
5288        if (radeon_mc_wait_for_idle(rdev)) {
5289                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290        }
5291        /* Lockout access through VGA aperture*/
5292        WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293        /* Update configuration */
5294        WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295               rdev->mc.vram_start >> 12);
5296        WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297               rdev->mc.vram_end >> 12);
5298        WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299               rdev->vram_scratch.gpu_addr >> 12);
5300        tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301        tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302        WREG32(MC_VM_FB_LOCATION, tmp);
5303        /* XXX double check these! */
5304        WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305        WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306        WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307        WREG32(MC_VM_AGP_BASE, 0);
5308        WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309        WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310        if (radeon_mc_wait_for_idle(rdev)) {
5311                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312        }
5313        evergreen_mc_resume(rdev, &save);
5314        /* we need to own VRAM, so turn off the VGA renderer here
5315         * to stop it overwriting our objects */
5316        rv515_vga_render_disable(rdev);
5317}
5318
5319/**
5320 * cik_mc_init - initialize the memory controller driver params
5321 *
5322 * @rdev: radeon_device pointer
5323 *
5324 * Look up the amount of vram, vram width, and decide how to place
5325 * vram and gart within the GPU's physical address space (CIK).
5326 * Returns 0 for success.
5327 */
5328static int cik_mc_init(struct radeon_device *rdev)
5329{
5330        u32 tmp;
5331        int chansize, numchan;
5332
5333        /* Get VRAM informations */
5334        rdev->mc.vram_is_ddr = true;
5335        tmp = RREG32(MC_ARB_RAMCFG);
5336        if (tmp & CHANSIZE_MASK) {
5337                chansize = 64;
5338        } else {
5339                chansize = 32;
5340        }
5341        tmp = RREG32(MC_SHARED_CHMAP);
5342        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343        case 0:
5344        default:
5345                numchan = 1;
5346                break;
5347        case 1:
5348                numchan = 2;
5349                break;
5350        case 2:
5351                numchan = 4;
5352                break;
5353        case 3:
5354                numchan = 8;
5355                break;
5356        case 4:
5357                numchan = 3;
5358                break;
5359        case 5:
5360                numchan = 6;
5361                break;
5362        case 6:
5363                numchan = 10;
5364                break;
5365        case 7:
5366                numchan = 12;
5367                break;
5368        case 8:
5369                numchan = 16;
5370                break;
5371        }
5372        rdev->mc.vram_width = numchan * chansize;
5373        /* Could aper size report 0 ? */
5374        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376        /* size in MB on si */
5377        rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378        rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379        rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380        si_vram_gtt_location(rdev, &rdev->mc);
5381        radeon_update_bandwidth_info(rdev);
5382
5383        return 0;
5384}
5385
5386/*
5387 * GART
5388 * VMID 0 is the physical GPU addresses as used by the kernel.
5389 * VMIDs 1-15 are used for userspace clients and are handled
5390 * by the radeon vm/hsa code.
5391 */
5392/**
5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Flush the TLB for the VMID 0 page table (CIK).
5398 */
5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400{
5401        /* flush hdp cache */
5402        WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404        /* bits 0-15 are the VM contexts0-15 */
5405        WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406}
5407
5408/**
5409 * cik_pcie_gart_enable - gart enable
5410 *
5411 * @rdev: radeon_device pointer
5412 *
5413 * This sets up the TLBs, programs the page tables for VMID0,
5414 * sets up the hw for VMIDs 1-15 which are allocated on
5415 * demand, and sets up the global locations for the LDS, GDS,
5416 * and GPUVM for FSA64 clients (CIK).
5417 * Returns 0 for success, errors for failure.
5418 */
5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420{
5421        int r, i;
5422
5423        if (rdev->gart.robj == NULL) {
5424                dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425                return -EINVAL;
5426        }
5427        r = radeon_gart_table_vram_pin(rdev);
5428        if (r)
5429                return r;
5430        /* Setup TLB control */
5431        WREG32(MC_VM_MX_L1_TLB_CNTL,
5432               (0xA << 7) |
5433               ENABLE_L1_TLB |
5434               ENABLE_L1_FRAGMENT_PROCESSING |
5435               SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436               ENABLE_ADVANCED_DRIVER_MODEL |
5437               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438        /* Setup L2 cache */
5439        WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440               ENABLE_L2_FRAGMENT_PROCESSING |
5441               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443               EFFECTIVE_L2_QUEUE_SIZE(7) |
5444               CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445        WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447               BANK_SELECT(4) |
5448               L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449        /* setup context0 */
5450        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451        WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454                        (u32)(rdev->dummy_page.addr >> 12));
5455        WREG32(VM_CONTEXT0_CNTL2, 0);
5456        WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457                                  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459        WREG32(0x15D4, 0);
5460        WREG32(0x15D8, 0);
5461        WREG32(0x15DC, 0);
5462
5463        /* restore context1-15 */
5464        /* set vm size, must be a multiple of 4 */
5465        WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466        WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467        for (i = 1; i < 16; i++) {
5468                if (i < 8)
5469                        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470                               rdev->vm_manager.saved_table_addr[i]);
5471                else
5472                        WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473                               rdev->vm_manager.saved_table_addr[i]);
5474        }
5475
5476        /* enable context1-15 */
5477        WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478               (u32)(rdev->dummy_page.addr >> 12));
5479        WREG32(VM_CONTEXT1_CNTL2, 4);
5480        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481                                PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486                                PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487                                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488                                VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489                                VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490                                READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491                                READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492                                WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493                                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495        if (rdev->family == CHIP_KAVERI) {
5496                u32 tmp = RREG32(CHUB_CONTROL);
5497                tmp &= ~BYPASS_VM;
5498                WREG32(CHUB_CONTROL, tmp);
5499        }
5500
5501        /* XXX SH_MEM regs */
5502        /* where to put LDS, scratch, GPUVM in FSA64 space */
5503        mutex_lock(&rdev->srbm_mutex);
5504        for (i = 0; i < 16; i++) {
5505                cik_srbm_select(rdev, 0, 0, 0, i);
5506                /* CP and shaders */
5507                WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508                WREG32(SH_MEM_APE1_BASE, 1);
5509                WREG32(SH_MEM_APE1_LIMIT, 0);
5510                WREG32(SH_MEM_BASES, 0);
5511                /* SDMA GFX */
5512                WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513                WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514                WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515                WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516                /* XXX SDMA RLC - todo */
5517        }
5518        cik_srbm_select(rdev, 0, 0, 0, 0);
5519        mutex_unlock(&rdev->srbm_mutex);
5520
5521        cik_pcie_gart_tlb_flush(rdev);
5522        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523                 (unsigned)(rdev->mc.gtt_size >> 20),
5524                 (unsigned long long)rdev->gart.table_addr);
5525        rdev->gart.ready = true;
5526        return 0;
5527}
5528
5529/**
5530 * cik_pcie_gart_disable - gart disable
5531 *
5532 * @rdev: radeon_device pointer
5533 *
5534 * This disables all VM page table (CIK).
5535 */
5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537{
5538        unsigned i;
5539
5540        for (i = 1; i < 16; ++i) {
5541                uint32_t reg;
5542                if (i < 8)
5543                        reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544                else
5545                        reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546                rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547        }
5548
5549        /* Disable all tables */
5550        WREG32(VM_CONTEXT0_CNTL, 0);
5551        WREG32(VM_CONTEXT1_CNTL, 0);
5552        /* Setup TLB control */
5553        WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555        /* Setup L2 cache */
5556        WREG32(VM_L2_CNTL,
5557               ENABLE_L2_FRAGMENT_PROCESSING |
5558               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560               EFFECTIVE_L2_QUEUE_SIZE(7) |
5561               CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562        WREG32(VM_L2_CNTL2, 0);
5563        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564               L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565        radeon_gart_table_vram_unpin(rdev);
5566}
5567
5568/**
5569 * cik_pcie_gart_fini - vm fini callback
5570 *
5571 * @rdev: radeon_device pointer
5572 *
5573 * Tears down the driver GART/VM setup (CIK).
5574 */
5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576{
5577        cik_pcie_gart_disable(rdev);
5578        radeon_gart_table_vram_free(rdev);
5579        radeon_gart_fini(rdev);
5580}
5581
5582/* vm parser */
5583/**
5584 * cik_ib_parse - vm ib_parse callback
5585 *
5586 * @rdev: radeon_device pointer
5587 * @ib: indirect buffer pointer
5588 *
5589 * CIK uses hw IB checking so this is a nop (CIK).
5590 */
5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592{
5593        return 0;
5594}
5595
5596/*
5597 * vm
5598 * VMID 0 is the physical GPU addresses as used by the kernel.
5599 * VMIDs 1-15 are used for userspace clients and are handled
5600 * by the radeon vm/hsa code.
5601 */
5602/**
5603 * cik_vm_init - cik vm init callback
5604 *
5605 * @rdev: radeon_device pointer
5606 *
5607 * Inits cik specific vm parameters (number of VMs, base of vram for
5608 * VMIDs 1-15) (CIK).
5609 * Returns 0 for success.
5610 */
5611int cik_vm_init(struct radeon_device *rdev)
5612{
5613        /*
5614         * number of VMs
5615         * VMID 0 is reserved for System
5616         * radeon graphics/compute will use VMIDs 1-15
5617         */
5618        rdev->vm_manager.nvm = 16;
5619        /* base offset of vram pages */
5620        if (rdev->flags & RADEON_IS_IGP) {
5621                u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622                tmp <<= 22;
5623                rdev->vm_manager.vram_base_offset = tmp;
5624        } else
5625                rdev->vm_manager.vram_base_offset = 0;
5626
5627        return 0;
5628}
5629
5630/**
5631 * cik_vm_fini - cik vm fini callback
5632 *
5633 * @rdev: radeon_device pointer
5634 *
5635 * Tear down any asic specific VM setup (CIK).
5636 */
5637void cik_vm_fini(struct radeon_device *rdev)
5638{
5639}
5640
5641/**
5642 * cik_vm_decode_fault - print human readable fault info
5643 *
5644 * @rdev: radeon_device pointer
5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648 *
5649 * Print human readable fault information (CIK).
5650 */
5651static void cik_vm_decode_fault(struct radeon_device *rdev,
5652                                u32 status, u32 addr, u32 mc_client)
5653{
5654        u32 mc_id;
5655        u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656        u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657        char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658                (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660        if (rdev->family == CHIP_HAWAII)
5661                mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662        else
5663                mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665        printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666               protections, vmid, addr,
5667               (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668               block, mc_client, mc_id);
5669}
5670
5671/*
5672 * cik_vm_flush - cik vm flush using the CP
5673 *
5674 * Update the page table base and flush the VM TLB
5675 * using the CP (CIK).
5676 */
5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678                  unsigned vm_id, uint64_t pd_addr)
5679{
5680        int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684                                 WRITE_DATA_DST_SEL(0)));
5685        if (vm_id < 8) {
5686                radeon_ring_write(ring,
5687                                  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688        } else {
5689                radeon_ring_write(ring,
5690                                  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691        }
5692        radeon_ring_write(ring, 0);
5693        radeon_ring_write(ring, pd_addr >> 12);
5694
5695        /* update SH_MEM_* regs */
5696        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698                                 WRITE_DATA_DST_SEL(0)));
5699        radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700        radeon_ring_write(ring, 0);
5701        radeon_ring_write(ring, VMID(vm_id));
5702
5703        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705                                 WRITE_DATA_DST_SEL(0)));
5706        radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707        radeon_ring_write(ring, 0);
5708
5709        radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710        radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711        radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712        radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716                                 WRITE_DATA_DST_SEL(0)));
5717        radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718        radeon_ring_write(ring, 0);
5719        radeon_ring_write(ring, VMID(0));
5720
5721        /* HDP flush */
5722        cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724        /* bits 0-15 are the VM contexts0-15 */
5725        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727                                 WRITE_DATA_DST_SEL(0)));
5728        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729        radeon_ring_write(ring, 0);
5730        radeon_ring_write(ring, 1 << vm_id);
5731
5732        /* wait for the invalidate to complete */
5733        radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734        radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736                                 WAIT_REG_MEM_ENGINE(0))); /* me */
5737        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738        radeon_ring_write(ring, 0);
5739        radeon_ring_write(ring, 0); /* ref */
5740        radeon_ring_write(ring, 0); /* mask */
5741        radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743        /* compute doesn't have PFP */
5744        if (usepfp) {
5745                /* sync PFP to ME, otherwise we might get invalid PFP reads */
5746                radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747                radeon_ring_write(ring, 0x0);
5748        }
5749}
5750
5751/*
5752 * RLC
5753 * The RLC is a multi-purpose microengine that handles a
5754 * variety of functions, the most important of which is
5755 * the interrupt controller.
5756 */
5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758                                          bool enable)
5759{
5760        u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762        if (enable)
5763                tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764        else
5765                tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766        WREG32(CP_INT_CNTL_RING0, tmp);
5767}
5768
5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770{
5771        u32 tmp;
5772
5773        tmp = RREG32(RLC_LB_CNTL);
5774        if (enable)
5775                tmp |= LOAD_BALANCE_ENABLE;
5776        else
5777                tmp &= ~LOAD_BALANCE_ENABLE;
5778        WREG32(RLC_LB_CNTL, tmp);
5779}
5780
5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782{
5783        u32 i, j, k;
5784        u32 mask;
5785
5786        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788                        cik_select_se_sh(rdev, i, j);
5789                        for (k = 0; k < rdev->usec_timeout; k++) {
5790                                if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791                                        break;
5792                                udelay(1);
5793                        }
5794                }
5795        }
5796        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798        mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799        for (k = 0; k < rdev->usec_timeout; k++) {
5800                if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801                        break;
5802                udelay(1);
5803        }
5804}
5805
5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807{
5808        u32 tmp;
5809
5810        tmp = RREG32(RLC_CNTL);
5811        if (tmp != rlc)
5812                WREG32(RLC_CNTL, rlc);
5813}
5814
5815static u32 cik_halt_rlc(struct radeon_device *rdev)
5816{
5817        u32 data, orig;
5818
5819        orig = data = RREG32(RLC_CNTL);
5820
5821        if (data & RLC_ENABLE) {
5822                u32 i;
5823
5824                data &= ~RLC_ENABLE;
5825                WREG32(RLC_CNTL, data);
5826
5827                for (i = 0; i < rdev->usec_timeout; i++) {
5828                        if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829                                break;
5830                        udelay(1);
5831                }
5832
5833                cik_wait_for_rlc_serdes(rdev);
5834        }
5835
5836        return orig;
5837}
5838
5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840{
5841        u32 tmp, i, mask;
5842
5843        tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844        WREG32(RLC_GPR_REG2, tmp);
5845
5846        mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847        for (i = 0; i < rdev->usec_timeout; i++) {
5848                if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849                        break;
5850                udelay(1);
5851        }
5852
5853        for (i = 0; i < rdev->usec_timeout; i++) {
5854                if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855                        break;
5856                udelay(1);
5857        }
5858}
5859
5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861{
5862        u32 tmp;
5863
5864        tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865        WREG32(RLC_GPR_REG2, tmp);
5866}
5867
5868/**
5869 * cik_rlc_stop - stop the RLC ME
5870 *
5871 * @rdev: radeon_device pointer
5872 *
5873 * Halt the RLC ME (MicroEngine) (CIK).
5874 */
5875static void cik_rlc_stop(struct radeon_device *rdev)
5876{
5877        WREG32(RLC_CNTL, 0);
5878
5879        cik_enable_gui_idle_interrupt(rdev, false);
5880
5881        cik_wait_for_rlc_serdes(rdev);
5882}
5883
5884/**
5885 * cik_rlc_start - start the RLC ME
5886 *
5887 * @rdev: radeon_device pointer
5888 *
5889 * Unhalt the RLC ME (MicroEngine) (CIK).
5890 */
5891static void cik_rlc_start(struct radeon_device *rdev)
5892{
5893        WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895        cik_enable_gui_idle_interrupt(rdev, true);
5896
5897        udelay(50);
5898}
5899
5900/**
5901 * cik_rlc_resume - setup the RLC hw
5902 *
5903 * @rdev: radeon_device pointer
5904 *
5905 * Initialize the RLC registers, load the ucode,
5906 * and start the RLC (CIK).
5907 * Returns 0 for success, -EINVAL if the ucode is not available.
5908 */
5909static int cik_rlc_resume(struct radeon_device *rdev)
5910{
5911        u32 i, size, tmp;
5912
5913        if (!rdev->rlc_fw)
5914                return -EINVAL;
5915
5916        cik_rlc_stop(rdev);
5917
5918        /* disable CG */
5919        tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920        WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922        si_rlc_reset(rdev);
5923
5924        cik_init_pg(rdev);
5925
5926        cik_init_cg(rdev);
5927
5928        WREG32(RLC_LB_CNTR_INIT, 0);
5929        WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932        WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933        WREG32(RLC_LB_PARAMS, 0x00600408);
5934        WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936        WREG32(RLC_MC_CNTL, 0);
5937        WREG32(RLC_UCODE_CNTL, 0);
5938
5939        if (rdev->new_fw) {
5940                const struct rlc_firmware_header_v1_0 *hdr =
5941                        (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942                const __le32 *fw_data = (const __le32 *)
5943                        (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945                radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947                size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948                WREG32(RLC_GPM_UCODE_ADDR, 0);
5949                for (i = 0; i < size; i++)
5950                        WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951                WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952        } else {
5953                const __be32 *fw_data;
5954
5955                switch (rdev->family) {
5956                case CHIP_BONAIRE:
5957                case CHIP_HAWAII:
5958                default:
5959                        size = BONAIRE_RLC_UCODE_SIZE;
5960                        break;
5961                case CHIP_KAVERI:
5962                        size = KV_RLC_UCODE_SIZE;
5963                        break;
5964                case CHIP_KABINI:
5965                        size = KB_RLC_UCODE_SIZE;
5966                        break;
5967                case CHIP_MULLINS:
5968                        size = ML_RLC_UCODE_SIZE;
5969                        break;
5970                }
5971
5972                fw_data = (const __be32 *)rdev->rlc_fw->data;
5973                WREG32(RLC_GPM_UCODE_ADDR, 0);
5974                for (i = 0; i < size; i++)
5975                        WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976                WREG32(RLC_GPM_UCODE_ADDR, 0);
5977        }
5978
5979        /* XXX - find out what chips support lbpw */
5980        cik_enable_lbpw(rdev, false);
5981
5982        if (rdev->family == CHIP_BONAIRE)
5983                WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985        cik_rlc_start(rdev);
5986
5987        return 0;
5988}
5989
5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991{
5992        u32 data, orig, tmp, tmp2;
5993
5994        orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997                cik_enable_gui_idle_interrupt(rdev, true);
5998
5999                tmp = cik_halt_rlc(rdev);
6000
6001                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004                tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005                WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007                cik_update_rlc(rdev, tmp);
6008
6009                data |= CGCG_EN | CGLS_EN;
6010        } else {
6011                cik_enable_gui_idle_interrupt(rdev, false);
6012
6013                RREG32(CB_CGTT_SCLK_CTRL);
6014                RREG32(CB_CGTT_SCLK_CTRL);
6015                RREG32(CB_CGTT_SCLK_CTRL);
6016                RREG32(CB_CGTT_SCLK_CTRL);
6017
6018                data &= ~(CGCG_EN | CGLS_EN);
6019        }
6020
6021        if (orig != data)
6022                WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024}
6025
6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027{
6028        u32 data, orig, tmp = 0;
6029
6030        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031                if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032                        if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033                                orig = data = RREG32(CP_MEM_SLP_CNTL);
6034                                data |= CP_MEM_LS_EN;
6035                                if (orig != data)
6036                                        WREG32(CP_MEM_SLP_CNTL, data);
6037                        }
6038                }
6039
6040                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041                data |= 0x00000001;
6042                data &= 0xfffffffd;
6043                if (orig != data)
6044                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046                tmp = cik_halt_rlc(rdev);
6047
6048                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051                data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052                WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054                cik_update_rlc(rdev, tmp);
6055
6056                if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057                        orig = data = RREG32(CGTS_SM_CTRL_REG);
6058                        data &= ~SM_MODE_MASK;
6059                        data |= SM_MODE(0x2);
6060                        data |= SM_MODE_ENABLE;
6061                        data &= ~CGTS_OVERRIDE;
6062                        if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063                            (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064                                data &= ~CGTS_LS_OVERRIDE;
6065                        data &= ~ON_MONITOR_ADD_MASK;
6066                        data |= ON_MONITOR_ADD_EN;
6067                        data |= ON_MONITOR_ADD(0x96);
6068                        if (orig != data)
6069                                WREG32(CGTS_SM_CTRL_REG, data);
6070                }
6071        } else {
6072                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073                data |= 0x00000003;
6074                if (orig != data)
6075                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077                data = RREG32(RLC_MEM_SLP_CNTL);
6078                if (data & RLC_MEM_LS_EN) {
6079                        data &= ~RLC_MEM_LS_EN;
6080                        WREG32(RLC_MEM_SLP_CNTL, data);
6081                }
6082
6083                data = RREG32(CP_MEM_SLP_CNTL);
6084                if (data & CP_MEM_LS_EN) {
6085                        data &= ~CP_MEM_LS_EN;
6086                        WREG32(CP_MEM_SLP_CNTL, data);
6087                }
6088
6089                orig = data = RREG32(CGTS_SM_CTRL_REG);
6090                data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091                if (orig != data)
6092                        WREG32(CGTS_SM_CTRL_REG, data);
6093
6094                tmp = cik_halt_rlc(rdev);
6095
6096                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099                data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100                WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102                cik_update_rlc(rdev, tmp);
6103        }
6104}
6105
6106static const u32 mc_cg_registers[] =
6107{
6108        MC_HUB_MISC_HUB_CG,
6109        MC_HUB_MISC_SIP_CG,
6110        MC_HUB_MISC_VM_CG,
6111        MC_XPB_CLK_GAT,
6112        ATC_MISC_CG,
6113        MC_CITF_MISC_WR_CG,
6114        MC_CITF_MISC_RD_CG,
6115        MC_CITF_MISC_VM_CG,
6116        VM_L2_CG,
6117};
6118
6119static void cik_enable_mc_ls(struct radeon_device *rdev,
6120                             bool enable)
6121{
6122        int i;
6123        u32 orig, data;
6124
6125        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126                orig = data = RREG32(mc_cg_registers[i]);
6127                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128                        data |= MC_LS_ENABLE;
6129                else
6130                        data &= ~MC_LS_ENABLE;
6131                if (data != orig)
6132                        WREG32(mc_cg_registers[i], data);
6133        }
6134}
6135
6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137                               bool enable)
6138{
6139        int i;
6140        u32 orig, data;
6141
6142        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143                orig = data = RREG32(mc_cg_registers[i]);
6144                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145                        data |= MC_CG_ENABLE;
6146                else
6147                        data &= ~MC_CG_ENABLE;
6148                if (data != orig)
6149                        WREG32(mc_cg_registers[i], data);
6150        }
6151}
6152
6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154                                 bool enable)
6155{
6156        u32 orig, data;
6157
6158        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159                WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160                WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161        } else {
6162                orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163                data |= 0xff000000;
6164                if (data != orig)
6165                        WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167                orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168                data |= 0xff000000;
6169                if (data != orig)
6170                        WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171        }
6172}
6173
6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175                                 bool enable)
6176{
6177        u32 orig, data;
6178
6179        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181                data |= 0x100;
6182                if (orig != data)
6183                        WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186                data |= 0x100;
6187                if (orig != data)
6188                        WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189        } else {
6190                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191                data &= ~0x100;
6192                if (orig != data)
6193                        WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196                data &= ~0x100;
6197                if (orig != data)
6198                        WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199        }
6200}
6201
6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203                                bool enable)
6204{
6205        u32 orig, data;
6206
6207        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208                data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209                data = 0xfff;
6210                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212                orig = data = RREG32(UVD_CGC_CTRL);
6213                data |= DCM;
6214                if (orig != data)
6215                        WREG32(UVD_CGC_CTRL, data);
6216        } else {
6217                data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218                data &= ~0xfff;
6219                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221                orig = data = RREG32(UVD_CGC_CTRL);
6222                data &= ~DCM;
6223                if (orig != data)
6224                        WREG32(UVD_CGC_CTRL, data);
6225        }
6226}
6227
6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229                               bool enable)
6230{
6231        u32 orig, data;
6232
6233        orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236                data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237                        REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238        else
6239                data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240                          REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242        if (orig != data)
6243                WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244}
6245
6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247                                bool enable)
6248{
6249        u32 orig, data;
6250
6251        orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254                data &= ~CLOCK_GATING_DIS;
6255        else
6256                data |= CLOCK_GATING_DIS;
6257
6258        if (orig != data)
6259                WREG32(HDP_HOST_PATH_CNTL, data);
6260}
6261
6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263                              bool enable)
6264{
6265        u32 orig, data;
6266
6267        orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270                data |= HDP_LS_ENABLE;
6271        else
6272                data &= ~HDP_LS_ENABLE;
6273
6274        if (orig != data)
6275                WREG32(HDP_MEM_POWER_LS, data);
6276}
6277
6278void cik_update_cg(struct radeon_device *rdev,
6279                   u32 block, bool enable)
6280{
6281
6282        if (block & RADEON_CG_BLOCK_GFX) {
6283                cik_enable_gui_idle_interrupt(rdev, false);
6284                /* order matters! */
6285                if (enable) {
6286                        cik_enable_mgcg(rdev, true);
6287                        cik_enable_cgcg(rdev, true);
6288                } else {
6289                        cik_enable_cgcg(rdev, false);
6290                        cik_enable_mgcg(rdev, false);
6291                }
6292                cik_enable_gui_idle_interrupt(rdev, true);
6293        }
6294
6295        if (block & RADEON_CG_BLOCK_MC) {
6296                if (!(rdev->flags & RADEON_IS_IGP)) {
6297                        cik_enable_mc_mgcg(rdev, enable);
6298                        cik_enable_mc_ls(rdev, enable);
6299                }
6300        }
6301
6302        if (block & RADEON_CG_BLOCK_SDMA) {
6303                cik_enable_sdma_mgcg(rdev, enable);
6304                cik_enable_sdma_mgls(rdev, enable);
6305        }
6306
6307        if (block & RADEON_CG_BLOCK_BIF) {
6308                cik_enable_bif_mgls(rdev, enable);
6309        }
6310
6311        if (block & RADEON_CG_BLOCK_UVD) {
6312                if (rdev->has_uvd)
6313                        cik_enable_uvd_mgcg(rdev, enable);
6314        }
6315
6316        if (block & RADEON_CG_BLOCK_HDP) {
6317                cik_enable_hdp_mgcg(rdev, enable);
6318                cik_enable_hdp_ls(rdev, enable);
6319        }
6320
6321        if (block & RADEON_CG_BLOCK_VCE) {
6322                vce_v2_0_enable_mgcg(rdev, enable);
6323        }
6324}
6325
6326static void cik_init_cg(struct radeon_device *rdev)
6327{
6328
6329        cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331        if (rdev->has_uvd)
6332                si_init_uvd_internal_cg(rdev);
6333
6334        cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335                             RADEON_CG_BLOCK_SDMA |
6336                             RADEON_CG_BLOCK_BIF |
6337                             RADEON_CG_BLOCK_UVD |
6338                             RADEON_CG_BLOCK_HDP), true);
6339}
6340
6341static void cik_fini_cg(struct radeon_device *rdev)
6342{
6343        cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344                             RADEON_CG_BLOCK_SDMA |
6345                             RADEON_CG_BLOCK_BIF |
6346                             RADEON_CG_BLOCK_UVD |
6347                             RADEON_CG_BLOCK_HDP), false);
6348
6349        cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350}
6351
6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353                                          bool enable)
6354{
6355        u32 data, orig;
6356
6357        orig = data = RREG32(RLC_PG_CNTL);
6358        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359                data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360        else
6361                data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362        if (orig != data)
6363                WREG32(RLC_PG_CNTL, data);
6364}
6365
6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367                                          bool enable)
6368{
6369        u32 data, orig;
6370
6371        orig = data = RREG32(RLC_PG_CNTL);
6372        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373                data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374        else
6375                data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376        if (orig != data)
6377                WREG32(RLC_PG_CNTL, data);
6378}
6379
6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381{
6382        u32 data, orig;
6383
6384        orig = data = RREG32(RLC_PG_CNTL);
6385        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386                data &= ~DISABLE_CP_PG;
6387        else
6388                data |= DISABLE_CP_PG;
6389        if (orig != data)
6390                WREG32(RLC_PG_CNTL, data);
6391}
6392
6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394{
6395        u32 data, orig;
6396
6397        orig = data = RREG32(RLC_PG_CNTL);
6398        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399                data &= ~DISABLE_GDS_PG;
6400        else
6401                data |= DISABLE_GDS_PG;
6402        if (orig != data)
6403                WREG32(RLC_PG_CNTL, data);
6404}
6405
6406#define CP_ME_TABLE_SIZE    96
6407#define CP_ME_TABLE_OFFSET  2048
6408#define CP_MEC_TABLE_OFFSET 4096
6409
6410void cik_init_cp_pg_table(struct radeon_device *rdev)
6411{
6412        volatile u32 *dst_ptr;
6413        int me, i, max_me = 4;
6414        u32 bo_offset = 0;
6415        u32 table_offset, table_size;
6416
6417        if (rdev->family == CHIP_KAVERI)
6418                max_me = 5;
6419
6420        if (rdev->rlc.cp_table_ptr == NULL)
6421                return;
6422
6423        /* write the cp table buffer */
6424        dst_ptr = rdev->rlc.cp_table_ptr;
6425        for (me = 0; me < max_me; me++) {
6426                if (rdev->new_fw) {
6427                        const __le32 *fw_data;
6428                        const struct gfx_firmware_header_v1_0 *hdr;
6429
6430                        if (me == 0) {
6431                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432                                fw_data = (const __le32 *)
6433                                        (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434                                table_offset = le32_to_cpu(hdr->jt_offset);
6435                                table_size = le32_to_cpu(hdr->jt_size);
6436                        } else if (me == 1) {
6437                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438                                fw_data = (const __le32 *)
6439                                        (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440                                table_offset = le32_to_cpu(hdr->jt_offset);
6441                                table_size = le32_to_cpu(hdr->jt_size);
6442                        } else if (me == 2) {
6443                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444                                fw_data = (const __le32 *)
6445                                        (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446                                table_offset = le32_to_cpu(hdr->jt_offset);
6447                                table_size = le32_to_cpu(hdr->jt_size);
6448                        } else if (me == 3) {
6449                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450                                fw_data = (const __le32 *)
6451                                        (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452                                table_offset = le32_to_cpu(hdr->jt_offset);
6453                                table_size = le32_to_cpu(hdr->jt_size);
6454                        } else {
6455                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456                                fw_data = (const __le32 *)
6457                                        (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458                                table_offset = le32_to_cpu(hdr->jt_offset);
6459                                table_size = le32_to_cpu(hdr->jt_size);
6460                        }
6461
6462                        for (i = 0; i < table_size; i ++) {
6463                                dst_ptr[bo_offset + i] =
6464                                        cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465                        }
6466                        bo_offset += table_size;
6467                } else {
6468                        const __be32 *fw_data;
6469                        table_size = CP_ME_TABLE_SIZE;
6470
6471                        if (me == 0) {
6472                                fw_data = (const __be32 *)rdev->ce_fw->data;
6473                                table_offset = CP_ME_TABLE_OFFSET;
6474                        } else if (me == 1) {
6475                                fw_data = (const __be32 *)rdev->pfp_fw->data;
6476                                table_offset = CP_ME_TABLE_OFFSET;
6477                        } else if (me == 2) {
6478                                fw_data = (const __be32 *)rdev->me_fw->data;
6479                                table_offset = CP_ME_TABLE_OFFSET;
6480                        } else {
6481                                fw_data = (const __be32 *)rdev->mec_fw->data;
6482                                table_offset = CP_MEC_TABLE_OFFSET;
6483                        }
6484
6485                        for (i = 0; i < table_size; i ++) {
6486                                dst_ptr[bo_offset + i] =
6487                                        cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488                        }
6489                        bo_offset += table_size;
6490                }
6491        }
6492}
6493
6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495                                bool enable)
6496{
6497        u32 data, orig;
6498
6499        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500                orig = data = RREG32(RLC_PG_CNTL);
6501                data |= GFX_PG_ENABLE;
6502                if (orig != data)
6503                        WREG32(RLC_PG_CNTL, data);
6504
6505                orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506                data |= AUTO_PG_EN;
6507                if (orig != data)
6508                        WREG32(RLC_AUTO_PG_CTRL, data);
6509        } else {
6510                orig = data = RREG32(RLC_PG_CNTL);
6511                data &= ~GFX_PG_ENABLE;
6512                if (orig != data)
6513                        WREG32(RLC_PG_CNTL, data);
6514
6515                orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516                data &= ~AUTO_PG_EN;
6517                if (orig != data)
6518                        WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520                data = RREG32(DB_RENDER_CONTROL);
6521        }
6522}
6523
6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525{
6526        u32 mask = 0, tmp, tmp1;
6527        int i;
6528
6529        cik_select_se_sh(rdev, se, sh);
6530        tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531        tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534        tmp &= 0xffff0000;
6535
6536        tmp |= tmp1;
6537        tmp >>= 16;
6538
6539        for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540                mask <<= 1;
6541                mask |= 1;
6542        }
6543
6544        return (~tmp) & mask;
6545}
6546
6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548{
6549        u32 i, j, k, active_cu_number = 0;
6550        u32 mask, counter, cu_bitmap;
6551        u32 tmp = 0;
6552
6553        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555                        mask = 1;
6556                        cu_bitmap = 0;
6557                        counter = 0;
6558                        for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559                                if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560                                        if (counter < 2)
6561                                                cu_bitmap |= mask;
6562                                        counter ++;
6563                                }
6564                                mask <<= 1;
6565                        }
6566
6567                        active_cu_number += counter;
6568                        tmp |= (cu_bitmap << (i * 16 + j * 8));
6569                }
6570        }
6571
6572        WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574        tmp = RREG32(RLC_MAX_PG_CU);
6575        tmp &= ~MAX_PU_CU_MASK;
6576        tmp |= MAX_PU_CU(active_cu_number);
6577        WREG32(RLC_MAX_PG_CU, tmp);
6578}
6579
6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581                                       bool enable)
6582{
6583        u32 data, orig;
6584
6585        orig = data = RREG32(RLC_PG_CNTL);
6586        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587                data |= STATIC_PER_CU_PG_ENABLE;
6588        else
6589                data &= ~STATIC_PER_CU_PG_ENABLE;
6590        if (orig != data)
6591                WREG32(RLC_PG_CNTL, data);
6592}
6593
6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595                                        bool enable)
6596{
6597        u32 data, orig;
6598
6599        orig = data = RREG32(RLC_PG_CNTL);
6600        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601                data |= DYN_PER_CU_PG_ENABLE;
6602        else
6603                data &= ~DYN_PER_CU_PG_ENABLE;
6604        if (orig != data)
6605                WREG32(RLC_PG_CNTL, data);
6606}
6607
6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610
6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612{
6613        u32 data, orig;
6614        u32 i;
6615
6616        if (rdev->rlc.cs_data) {
6617                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618                WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619                WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620                WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621        } else {
6622                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623                for (i = 0; i < 3; i++)
6624                        WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625        }
6626        if (rdev->rlc.reg_list) {
6627                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628                for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629                        WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630        }
6631
6632        orig = data = RREG32(RLC_PG_CNTL);
6633        data |= GFX_PG_SRC;
6634        if (orig != data)
6635                WREG32(RLC_PG_CNTL, data);
6636
6637        WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638        WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640        data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641        data &= ~IDLE_POLL_COUNT_MASK;
6642        data |= IDLE_POLL_COUNT(0x60);
6643        WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645        data = 0x10101010;
6646        WREG32(RLC_PG_DELAY, data);
6647
6648        data = RREG32(RLC_PG_DELAY_2);
6649        data &= ~0xff;
6650        data |= 0x3;
6651        WREG32(RLC_PG_DELAY_2, data);
6652
6653        data = RREG32(RLC_AUTO_PG_CTRL);
6654        data &= ~GRBM_REG_SGIT_MASK;
6655        data |= GRBM_REG_SGIT(0x700);
6656        WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658}
6659
6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661{
6662        cik_enable_gfx_cgpg(rdev, enable);
6663        cik_enable_gfx_static_mgpg(rdev, enable);
6664        cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665}
6666
6667u32 cik_get_csb_size(struct radeon_device *rdev)
6668{
6669        u32 count = 0;
6670        const struct cs_section_def *sect = NULL;
6671        const struct cs_extent_def *ext = NULL;
6672
6673        if (rdev->rlc.cs_data == NULL)
6674                return 0;
6675
6676        /* begin clear state */
6677        count += 2;
6678        /* context control state */
6679        count += 3;
6680
6681        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682                for (ext = sect->section; ext->extent != NULL; ++ext) {
6683                        if (sect->id == SECT_CONTEXT)
6684                                count += 2 + ext->reg_count;
6685                        else
6686                                return 0;
6687                }
6688        }
6689        /* pa_sc_raster_config/pa_sc_raster_config1 */
6690        count += 4;
6691        /* end clear state */
6692        count += 2;
6693        /* clear state */
6694        count += 2;
6695
6696        return count;
6697}
6698
6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700{
6701        u32 count = 0, i;
6702        const struct cs_section_def *sect = NULL;
6703        const struct cs_extent_def *ext = NULL;
6704
6705        if (rdev->rlc.cs_data == NULL)
6706                return;
6707        if (buffer == NULL)
6708                return;
6709
6710        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714        buffer[count++] = cpu_to_le32(0x80000000);
6715        buffer[count++] = cpu_to_le32(0x80000000);
6716
6717        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718                for (ext = sect->section; ext->extent != NULL; ++ext) {
6719                        if (sect->id == SECT_CONTEXT) {
6720                                buffer[count++] =
6721                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722                                buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723                                for (i = 0; i < ext->reg_count; i++)
6724                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
6725                        } else {
6726                                return;
6727                        }
6728                }
6729        }
6730
6731        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732        buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733        switch (rdev->family) {
6734        case CHIP_BONAIRE:
6735                buffer[count++] = cpu_to_le32(0x16000012);
6736                buffer[count++] = cpu_to_le32(0x00000000);
6737                break;
6738        case CHIP_KAVERI:
6739                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740                buffer[count++] = cpu_to_le32(0x00000000);
6741                break;
6742        case CHIP_KABINI:
6743        case CHIP_MULLINS:
6744                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745                buffer[count++] = cpu_to_le32(0x00000000);
6746                break;
6747        case CHIP_HAWAII:
6748                buffer[count++] = cpu_to_le32(0x3a00161a);
6749                buffer[count++] = cpu_to_le32(0x0000002e);
6750                break;
6751        default:
6752                buffer[count++] = cpu_to_le32(0x00000000);
6753                buffer[count++] = cpu_to_le32(0x00000000);
6754                break;
6755        }
6756
6757        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761        buffer[count++] = cpu_to_le32(0);
6762}
6763
6764static void cik_init_pg(struct radeon_device *rdev)
6765{
6766        if (rdev->pg_flags) {
6767                cik_enable_sck_slowdown_on_pu(rdev, true);
6768                cik_enable_sck_slowdown_on_pd(rdev, true);
6769                if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770                        cik_init_gfx_cgpg(rdev);
6771                        cik_enable_cp_pg(rdev, true);
6772                        cik_enable_gds_pg(rdev, true);
6773                }
6774                cik_init_ao_cu_mask(rdev);
6775                cik_update_gfx_pg(rdev, true);
6776        }
6777}
6778
6779static void cik_fini_pg(struct radeon_device *rdev)
6780{
6781        if (rdev->pg_flags) {
6782                cik_update_gfx_pg(rdev, false);
6783                if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784                        cik_enable_cp_pg(rdev, false);
6785                        cik_enable_gds_pg(rdev, false);
6786                }
6787        }
6788}
6789
6790/*
6791 * Interrupts
6792 * Starting with r6xx, interrupts are handled via a ring buffer.
6793 * Ring buffers are areas of GPU accessible memory that the GPU
6794 * writes interrupt vectors into and the host reads vectors out of.
6795 * There is a rptr (read pointer) that determines where the
6796 * host is currently reading, and a wptr (write pointer)
6797 * which determines where the GPU has written.  When the
6798 * pointers are equal, the ring is idle.  When the GPU
6799 * writes vectors to the ring buffer, it increments the
6800 * wptr.  When there is an interrupt, the host then starts
6801 * fetching commands and processing them until the pointers are
6802 * equal again at which point it updates the rptr.
6803 */
6804
6805/**
6806 * cik_enable_interrupts - Enable the interrupt ring buffer
6807 *
6808 * @rdev: radeon_device pointer
6809 *
6810 * Enable the interrupt ring buffer (CIK).
6811 */
6812static void cik_enable_interrupts(struct radeon_device *rdev)
6813{
6814        u32 ih_cntl = RREG32(IH_CNTL);
6815        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817        ih_cntl |= ENABLE_INTR;
6818        ih_rb_cntl |= IH_RB_ENABLE;
6819        WREG32(IH_CNTL, ih_cntl);
6820        WREG32(IH_RB_CNTL, ih_rb_cntl);
6821        rdev->ih.enabled = true;
6822}
6823
6824/**
6825 * cik_disable_interrupts - Disable the interrupt ring buffer
6826 *
6827 * @rdev: radeon_device pointer
6828 *
6829 * Disable the interrupt ring buffer (CIK).
6830 */
6831static void cik_disable_interrupts(struct radeon_device *rdev)
6832{
6833        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834        u32 ih_cntl = RREG32(IH_CNTL);
6835
6836        ih_rb_cntl &= ~IH_RB_ENABLE;
6837        ih_cntl &= ~ENABLE_INTR;
6838        WREG32(IH_RB_CNTL, ih_rb_cntl);
6839        WREG32(IH_CNTL, ih_cntl);
6840        /* set rptr, wptr to 0 */
6841        WREG32(IH_RB_RPTR, 0);
6842        WREG32(IH_RB_WPTR, 0);
6843        rdev->ih.enabled = false;
6844        rdev->ih.rptr = 0;
6845}
6846
6847/**
6848 * cik_disable_interrupt_state - Disable all interrupt sources
6849 *
6850 * @rdev: radeon_device pointer
6851 *
6852 * Clear all interrupt enable bits used by the driver (CIK).
6853 */
6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855{
6856        u32 tmp;
6857
6858        /* gfx ring */
6859        tmp = RREG32(CP_INT_CNTL_RING0) &
6860                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861        WREG32(CP_INT_CNTL_RING0, tmp);
6862        /* sdma */
6863        tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865        tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866        WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867        /* compute queues */
6868        WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869        WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870        WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871        WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872        WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873        WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874        WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875        WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876        /* grbm */
6877        WREG32(GRBM_INT_CNTL, 0);
6878        /* SRBM */
6879        WREG32(SRBM_INT_CNTL, 0);
6880        /* vline/vblank, etc. */
6881        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883        if (rdev->num_crtc >= 4) {
6884                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886        }
6887        if (rdev->num_crtc >= 6) {
6888                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890        }
6891        /* pflip */
6892        if (rdev->num_crtc >= 2) {
6893                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895        }
6896        if (rdev->num_crtc >= 4) {
6897                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899        }
6900        if (rdev->num_crtc >= 6) {
6901                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903        }
6904
6905        /* dac hotplug */
6906        WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908        /* digital hotplug */
6909        tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910        WREG32(DC_HPD1_INT_CONTROL, tmp);
6911        tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912        WREG32(DC_HPD2_INT_CONTROL, tmp);
6913        tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914        WREG32(DC_HPD3_INT_CONTROL, tmp);
6915        tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916        WREG32(DC_HPD4_INT_CONTROL, tmp);
6917        tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918        WREG32(DC_HPD5_INT_CONTROL, tmp);
6919        tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920        WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922}
6923
6924/**
6925 * cik_irq_init - init and enable the interrupt ring
6926 *
6927 * @rdev: radeon_device pointer
6928 *
6929 * Allocate a ring buffer for the interrupt controller,
6930 * enable the RLC, disable interrupts, enable the IH
6931 * ring buffer and enable it (CIK).
6932 * Called at device load and reume.
6933 * Returns 0 for success, errors for failure.
6934 */
6935static int cik_irq_init(struct radeon_device *rdev)
6936{
6937        int ret = 0;
6938        int rb_bufsz;
6939        u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941        /* allocate ring */
6942        ret = r600_ih_ring_alloc(rdev);
6943        if (ret)
6944                return ret;
6945
6946        /* disable irqs */
6947        cik_disable_interrupts(rdev);
6948
6949        /* init rlc */
6950        ret = cik_rlc_resume(rdev);
6951        if (ret) {
6952                r600_ih_ring_fini(rdev);
6953                return ret;
6954        }
6955
6956        /* setup interrupt control */
6957        /* set dummy read address to dummy page address */
6958        WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959        interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960        /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961         * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962         */
6963        interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964        /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965        interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966        WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969        rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972                      IH_WPTR_OVERFLOW_CLEAR |
6973                      (rb_bufsz << 1));
6974
6975        if (rdev->wb.enabled)
6976                ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978        /* set the writeback address whether it's enabled or not */
6979        WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980        WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982        WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984        /* set rptr, wptr to 0 */
6985        WREG32(IH_RB_RPTR, 0);
6986        WREG32(IH_RB_WPTR, 0);
6987
6988        /* Default settings for IH_CNTL (disabled at first) */
6989        ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990        /* RPTR_REARM only works if msi's are enabled */
6991        if (rdev->msi_enabled)
6992                ih_cntl |= RPTR_REARM;
6993        WREG32(IH_CNTL, ih_cntl);
6994
6995        /* force the active interrupt state to all disabled */
6996        cik_disable_interrupt_state(rdev);
6997
6998        pci_set_master(rdev->pdev);
6999
7000        /* enable irqs */
7001        cik_enable_interrupts(rdev);
7002
7003        return ret;
7004}
7005
7006/**
7007 * cik_irq_set - enable/disable interrupt sources
7008 *
7009 * @rdev: radeon_device pointer
7010 *
7011 * Enable interrupt sources on the GPU (vblanks, hpd,
7012 * etc.) (CIK).
7013 * Returns 0 for success, errors for failure.
7014 */
7015int cik_irq_set(struct radeon_device *rdev)
7016{
7017        u32 cp_int_cntl;
7018        u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019        u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020        u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021        u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022        u32 grbm_int_cntl = 0;
7023        u32 dma_cntl, dma_cntl1;
7024
7025        if (!rdev->irq.installed) {
7026                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027                return -EINVAL;
7028        }
7029        /* don't enable anything if the ih is disabled */
7030        if (!rdev->ih.enabled) {
7031                cik_disable_interrupts(rdev);
7032                /* force the active interrupt state to all disabled */
7033                cik_disable_interrupt_state(rdev);
7034                return 0;
7035        }
7036
7037        cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039        cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041        hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042        hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043        hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044        hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045        hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046        hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048        dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049        dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051        cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052        cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053        cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054        cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055        cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056        cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057        cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058        cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060        /* enable CP interrupts on all rings */
7061        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062                DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063                cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064        }
7065        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066                struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067                DRM_DEBUG("si_irq_set: sw int cp1\n");
7068                if (ring->me == 1) {
7069                        switch (ring->pipe) {
7070                        case 0:
7071                                cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072                                break;
7073                        case 1:
7074                                cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075                                break;
7076                        case 2:
7077                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078                                break;
7079                        case 3:
7080                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081                                break;
7082                        default:
7083                                DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084                                break;
7085                        }
7086                } else if (ring->me == 2) {
7087                        switch (ring->pipe) {
7088                        case 0:
7089                                cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090                                break;
7091                        case 1:
7092                                cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093                                break;
7094                        case 2:
7095                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096                                break;
7097                        case 3:
7098                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099                                break;
7100                        default:
7101                                DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102                                break;
7103                        }
7104                } else {
7105                        DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106                }
7107        }
7108        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109                struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110                DRM_DEBUG("si_irq_set: sw int cp2\n");
7111                if (ring->me == 1) {
7112                        switch (ring->pipe) {
7113                        case 0:
7114                                cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115                                break;
7116                        case 1:
7117                                cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118                                break;
7119                        case 2:
7120                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121                                break;
7122                        case 3:
7123                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124                                break;
7125                        default:
7126                                DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127                                break;
7128                        }
7129                } else if (ring->me == 2) {
7130                        switch (ring->pipe) {
7131                        case 0:
7132                                cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133                                break;
7134                        case 1:
7135                                cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136                                break;
7137                        case 2:
7138                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139                                break;
7140                        case 3:
7141                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142                                break;
7143                        default:
7144                                DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145                                break;
7146                        }
7147                } else {
7148                        DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149                }
7150        }
7151
7152        if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153                DRM_DEBUG("cik_irq_set: sw int dma\n");
7154                dma_cntl |= TRAP_ENABLE;
7155        }
7156
7157        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158                DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159                dma_cntl1 |= TRAP_ENABLE;
7160        }
7161
7162        if (rdev->irq.crtc_vblank_int[0] ||
7163            atomic_read(&rdev->irq.pflip[0])) {
7164                DRM_DEBUG("cik_irq_set: vblank 0\n");
7165                crtc1 |= VBLANK_INTERRUPT_MASK;
7166        }
7167        if (rdev->irq.crtc_vblank_int[1] ||
7168            atomic_read(&rdev->irq.pflip[1])) {
7169                DRM_DEBUG("cik_irq_set: vblank 1\n");
7170                crtc2 |= VBLANK_INTERRUPT_MASK;
7171        }
7172        if (rdev->irq.crtc_vblank_int[2] ||
7173            atomic_read(&rdev->irq.pflip[2])) {
7174                DRM_DEBUG("cik_irq_set: vblank 2\n");
7175                crtc3 |= VBLANK_INTERRUPT_MASK;
7176        }
7177        if (rdev->irq.crtc_vblank_int[3] ||
7178            atomic_read(&rdev->irq.pflip[3])) {
7179                DRM_DEBUG("cik_irq_set: vblank 3\n");
7180                crtc4 |= VBLANK_INTERRUPT_MASK;
7181        }
7182        if (rdev->irq.crtc_vblank_int[4] ||
7183            atomic_read(&rdev->irq.pflip[4])) {
7184                DRM_DEBUG("cik_irq_set: vblank 4\n");
7185                crtc5 |= VBLANK_INTERRUPT_MASK;
7186        }
7187        if (rdev->irq.crtc_vblank_int[5] ||
7188            atomic_read(&rdev->irq.pflip[5])) {
7189                DRM_DEBUG("cik_irq_set: vblank 5\n");
7190                crtc6 |= VBLANK_INTERRUPT_MASK;
7191        }
7192        if (rdev->irq.hpd[0]) {
7193                DRM_DEBUG("cik_irq_set: hpd 1\n");
7194                hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195        }
7196        if (rdev->irq.hpd[1]) {
7197                DRM_DEBUG("cik_irq_set: hpd 2\n");
7198                hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199        }
7200        if (rdev->irq.hpd[2]) {
7201                DRM_DEBUG("cik_irq_set: hpd 3\n");
7202                hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203        }
7204        if (rdev->irq.hpd[3]) {
7205                DRM_DEBUG("cik_irq_set: hpd 4\n");
7206                hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207        }
7208        if (rdev->irq.hpd[4]) {
7209                DRM_DEBUG("cik_irq_set: hpd 5\n");
7210                hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211        }
7212        if (rdev->irq.hpd[5]) {
7213                DRM_DEBUG("cik_irq_set: hpd 6\n");
7214                hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215        }
7216
7217        WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220        WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222        WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223        WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224        WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225        WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226        WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227        WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228        WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229        WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231        WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235        if (rdev->num_crtc >= 4) {
7236                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238        }
7239        if (rdev->num_crtc >= 6) {
7240                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242        }
7243
7244        if (rdev->num_crtc >= 2) {
7245                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246                       GRPH_PFLIP_INT_MASK);
7247                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248                       GRPH_PFLIP_INT_MASK);
7249        }
7250        if (rdev->num_crtc >= 4) {
7251                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252                       GRPH_PFLIP_INT_MASK);
7253                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254                       GRPH_PFLIP_INT_MASK);
7255        }
7256        if (rdev->num_crtc >= 6) {
7257                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258                       GRPH_PFLIP_INT_MASK);
7259                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260                       GRPH_PFLIP_INT_MASK);
7261        }
7262
7263        WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264        WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265        WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266        WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267        WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268        WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270        /* posting read */
7271        RREG32(SRBM_STATUS);
7272
7273        return 0;
7274}
7275
7276/**
7277 * cik_irq_ack - ack interrupt sources
7278 *
7279 * @rdev: radeon_device pointer
7280 *
7281 * Ack interrupt sources on the GPU (vblanks, hpd,
7282 * etc.) (CIK).  Certain interrupts sources are sw
7283 * generated and do not require an explicit ack.
7284 */
7285static inline void cik_irq_ack(struct radeon_device *rdev)
7286{
7287        u32 tmp;
7288
7289        rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290        rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291        rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292        rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293        rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294        rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295        rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297        rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298                EVERGREEN_CRTC0_REGISTER_OFFSET);
7299        rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300                EVERGREEN_CRTC1_REGISTER_OFFSET);
7301        if (rdev->num_crtc >= 4) {
7302                rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303                        EVERGREEN_CRTC2_REGISTER_OFFSET);
7304                rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305                        EVERGREEN_CRTC3_REGISTER_OFFSET);
7306        }
7307        if (rdev->num_crtc >= 6) {
7308                rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309                        EVERGREEN_CRTC4_REGISTER_OFFSET);
7310                rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311                        EVERGREEN_CRTC5_REGISTER_OFFSET);
7312        }
7313
7314        if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315                WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316                       GRPH_PFLIP_INT_CLEAR);
7317        if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318                WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319                       GRPH_PFLIP_INT_CLEAR);
7320        if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321                WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322        if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323                WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324        if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325                WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326        if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327                WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329        if (rdev->num_crtc >= 4) {
7330                if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332                               GRPH_PFLIP_INT_CLEAR);
7333                if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335                               GRPH_PFLIP_INT_CLEAR);
7336                if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338                if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340                if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342                if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344        }
7345
7346        if (rdev->num_crtc >= 6) {
7347                if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349                               GRPH_PFLIP_INT_CLEAR);
7350                if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352                               GRPH_PFLIP_INT_CLEAR);
7353                if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355                if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357                if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359                if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361        }
7362
7363        if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364                tmp = RREG32(DC_HPD1_INT_CONTROL);
7365                tmp |= DC_HPDx_INT_ACK;
7366                WREG32(DC_HPD1_INT_CONTROL, tmp);
7367        }
7368        if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369                tmp = RREG32(DC_HPD2_INT_CONTROL);
7370                tmp |= DC_HPDx_INT_ACK;
7371                WREG32(DC_HPD2_INT_CONTROL, tmp);
7372        }
7373        if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374                tmp = RREG32(DC_HPD3_INT_CONTROL);
7375                tmp |= DC_HPDx_INT_ACK;
7376                WREG32(DC_HPD3_INT_CONTROL, tmp);
7377        }
7378        if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379                tmp = RREG32(DC_HPD4_INT_CONTROL);
7380                tmp |= DC_HPDx_INT_ACK;
7381                WREG32(DC_HPD4_INT_CONTROL, tmp);
7382        }
7383        if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384                tmp = RREG32(DC_HPD5_INT_CONTROL);
7385                tmp |= DC_HPDx_INT_ACK;
7386                WREG32(DC_HPD5_INT_CONTROL, tmp);
7387        }
7388        if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389                tmp = RREG32(DC_HPD6_INT_CONTROL);
7390                tmp |= DC_HPDx_INT_ACK;
7391                WREG32(DC_HPD6_INT_CONTROL, tmp);
7392        }
7393        if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394                tmp = RREG32(DC_HPD1_INT_CONTROL);
7395                tmp |= DC_HPDx_RX_INT_ACK;
7396                WREG32(DC_HPD1_INT_CONTROL, tmp);
7397        }
7398        if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399                tmp = RREG32(DC_HPD2_INT_CONTROL);
7400                tmp |= DC_HPDx_RX_INT_ACK;
7401                WREG32(DC_HPD2_INT_CONTROL, tmp);
7402        }
7403        if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404                tmp = RREG32(DC_HPD3_INT_CONTROL);
7405                tmp |= DC_HPDx_RX_INT_ACK;
7406                WREG32(DC_HPD3_INT_CONTROL, tmp);
7407        }
7408        if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409                tmp = RREG32(DC_HPD4_INT_CONTROL);
7410                tmp |= DC_HPDx_RX_INT_ACK;
7411                WREG32(DC_HPD4_INT_CONTROL, tmp);
7412        }
7413        if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414                tmp = RREG32(DC_HPD5_INT_CONTROL);
7415                tmp |= DC_HPDx_RX_INT_ACK;
7416                WREG32(DC_HPD5_INT_CONTROL, tmp);
7417        }
7418        if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419                tmp = RREG32(DC_HPD6_INT_CONTROL);
7420                tmp |= DC_HPDx_RX_INT_ACK;
7421                WREG32(DC_HPD6_INT_CONTROL, tmp);
7422        }
7423}
7424
7425/**
7426 * cik_irq_disable - disable interrupts
7427 *
7428 * @rdev: radeon_device pointer
7429 *
7430 * Disable interrupts on the hw (CIK).
7431 */
7432static void cik_irq_disable(struct radeon_device *rdev)
7433{
7434        cik_disable_interrupts(rdev);
7435        /* Wait and acknowledge irq */
7436        mdelay(1);
7437        cik_irq_ack(rdev);
7438        cik_disable_interrupt_state(rdev);
7439}
7440
7441/**
7442 * cik_irq_suspend - disable interrupts for suspend
7443 *
7444 * @rdev: radeon_device pointer
7445 *
7446 * Disable interrupts and stop the RLC (CIK).
7447 * Used for suspend.
7448 */
7449static void cik_irq_suspend(struct radeon_device *rdev)
7450{
7451        cik_irq_disable(rdev);
7452        cik_rlc_stop(rdev);
7453}
7454
7455/**
7456 * cik_irq_fini - tear down interrupt support
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw and free the IH ring
7461 * buffer (CIK).
7462 * Used for driver unload.
7463 */
7464static void cik_irq_fini(struct radeon_device *rdev)
7465{
7466        cik_irq_suspend(rdev);
7467        r600_ih_ring_fini(rdev);
7468}
7469
7470/**
7471 * cik_get_ih_wptr - get the IH ring buffer wptr
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Get the IH ring buffer wptr from either the register
7476 * or the writeback memory buffer (CIK).  Also check for
7477 * ring buffer overflow and deal with it.
7478 * Used by cik_irq_process().
7479 * Returns the value of the wptr.
7480 */
7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482{
7483        u32 wptr, tmp;
7484
7485        if (rdev->wb.enabled)
7486                wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487        else
7488                wptr = RREG32(IH_RB_WPTR);
7489
7490        if (wptr & RB_OVERFLOW) {
7491                wptr &= ~RB_OVERFLOW;
7492                /* When a ring buffer overflow happen start parsing interrupt
7493                 * from the last not overwritten vector (wptr + 16). Hopefully
7494                 * this should allow us to catchup.
7495                 */
7496                dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497                         wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498                rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499                tmp = RREG32(IH_RB_CNTL);
7500                tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501                WREG32(IH_RB_CNTL, tmp);
7502        }
7503        return (wptr & rdev->ih.ptr_mask);
7504}
7505
7506/*        CIK IV Ring
7507 * Each IV ring entry is 128 bits:
7508 * [7:0]    - interrupt source id
7509 * [31:8]   - reserved
7510 * [59:32]  - interrupt source data
7511 * [63:60]  - reserved
7512 * [71:64]  - RINGID
7513 *            CP:
7514 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518 *            PIPE_ID - ME0 0=3D
7519 *                    - ME1&2 compute dispatcher (4 pipes each)
7520 *            SDMA:
7521 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524 * [79:72]  - VMID
7525 * [95:80]  - PASID
7526 * [127:96] - reserved
7527 */
7528/**
7529 * cik_irq_process - interrupt handler
7530 *
7531 * @rdev: radeon_device pointer
7532 *
7533 * Interrupt hander (CIK).  Walk the IH ring,
7534 * ack interrupts and schedule work to handle
7535 * interrupt events.
7536 * Returns irq process return code.
7537 */
7538int cik_irq_process(struct radeon_device *rdev)
7539{
7540        struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541        struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542        u32 wptr;
7543        u32 rptr;
7544        u32 src_id, src_data, ring_id;
7545        u8 me_id, pipe_id, queue_id;
7546        u32 ring_index;
7547        bool queue_hotplug = false;
7548        bool queue_dp = false;
7549        bool queue_reset = false;
7550        u32 addr, status, mc_client;
7551        bool queue_thermal = false;
7552
7553        if (!rdev->ih.enabled || rdev->shutdown)
7554                return IRQ_NONE;
7555
7556        wptr = cik_get_ih_wptr(rdev);
7557
7558restart_ih:
7559        /* is somebody else already processing irqs? */
7560        if (atomic_xchg(&rdev->ih.lock, 1))
7561                return IRQ_NONE;
7562
7563        rptr = rdev->ih.rptr;
7564        DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565
7566        /* Order reading of wptr vs. reading of IH ring data */
7567        rmb();
7568
7569        /* display interrupts */
7570        cik_irq_ack(rdev);
7571
7572        while (rptr != wptr) {
7573                /* wptr/rptr are in bytes! */
7574                ring_index = rptr / 4;
7575
7576                src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577                src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578                ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579
7580                switch (src_id) {
7581                case 1: /* D1 vblank/vline */
7582                        switch (src_data) {
7583                        case 0: /* D1 vblank */
7584                                if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587                                if (rdev->irq.crtc_vblank_int[0]) {
7588                                        drm_handle_vblank(rdev->ddev, 0);
7589                                        rdev->pm.vblank_sync = true;
7590                                        wake_up(&rdev->irq.vblank_queue);
7591                                }
7592                                if (atomic_read(&rdev->irq.pflip[0]))
7593                                        radeon_crtc_handle_vblank(rdev, 0);
7594                                rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595                                DRM_DEBUG("IH: D1 vblank\n");
7596
7597                                break;
7598                        case 1: /* D1 vline */
7599                                if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602                                rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603                                DRM_DEBUG("IH: D1 vline\n");
7604
7605                                break;
7606                        default:
7607                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608                                break;
7609                        }
7610                        break;
7611                case 2: /* D2 vblank/vline */
7612                        switch (src_data) {
7613                        case 0: /* D2 vblank */
7614                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617                                if (rdev->irq.crtc_vblank_int[1]) {
7618                                        drm_handle_vblank(rdev->ddev, 1);
7619                                        rdev->pm.vblank_sync = true;
7620                                        wake_up(&rdev->irq.vblank_queue);
7621                                }
7622                                if (atomic_read(&rdev->irq.pflip[1]))
7623                                        radeon_crtc_handle_vblank(rdev, 1);
7624                                rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625                                DRM_DEBUG("IH: D2 vblank\n");
7626
7627                                break;
7628                        case 1: /* D2 vline */
7629                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632                                rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633                                DRM_DEBUG("IH: D2 vline\n");
7634
7635                                break;
7636                        default:
7637                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638                                break;
7639                        }
7640                        break;
7641                case 3: /* D3 vblank/vline */
7642                        switch (src_data) {
7643                        case 0: /* D3 vblank */
7644                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647                                if (rdev->irq.crtc_vblank_int[2]) {
7648                                        drm_handle_vblank(rdev->ddev, 2);
7649                                        rdev->pm.vblank_sync = true;
7650                                        wake_up(&rdev->irq.vblank_queue);
7651                                }
7652                                if (atomic_read(&rdev->irq.pflip[2]))
7653                                        radeon_crtc_handle_vblank(rdev, 2);
7654                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655                                DRM_DEBUG("IH: D3 vblank\n");
7656
7657                                break;
7658                        case 1: /* D3 vline */
7659                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663                                DRM_DEBUG("IH: D3 vline\n");
7664
7665                                break;
7666                        default:
7667                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668                                break;
7669                        }
7670                        break;
7671                case 4: /* D4 vblank/vline */
7672                        switch (src_data) {
7673                        case 0: /* D4 vblank */
7674                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677                                if (rdev->irq.crtc_vblank_int[3]) {
7678                                        drm_handle_vblank(rdev->ddev, 3);
7679                                        rdev->pm.vblank_sync = true;
7680                                        wake_up(&rdev->irq.vblank_queue);
7681                                }
7682                                if (atomic_read(&rdev->irq.pflip[3]))
7683                                        radeon_crtc_handle_vblank(rdev, 3);
7684                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685                                DRM_DEBUG("IH: D4 vblank\n");
7686
7687                                break;
7688                        case 1: /* D4 vline */
7689                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693                                DRM_DEBUG("IH: D4 vline\n");
7694
7695                                break;
7696                        default:
7697                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698                                break;
7699                        }
7700                        break;
7701                case 5: /* D5 vblank/vline */
7702                        switch (src_data) {
7703                        case 0: /* D5 vblank */
7704                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707                                if (rdev->irq.crtc_vblank_int[4]) {
7708                                        drm_handle_vblank(rdev->ddev, 4);
7709                                        rdev->pm.vblank_sync = true;
7710                                        wake_up(&rdev->irq.vblank_queue);
7711                                }
7712                                if (atomic_read(&rdev->irq.pflip[4]))
7713                                        radeon_crtc_handle_vblank(rdev, 4);
7714                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715                                DRM_DEBUG("IH: D5 vblank\n");
7716
7717                                break;
7718                        case 1: /* D5 vline */
7719                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723                                DRM_DEBUG("IH: D5 vline\n");
7724
7725                                break;
7726                        default:
7727                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728                                break;
7729                        }
7730                        break;
7731                case 6: /* D6 vblank/vline */
7732                        switch (src_data) {
7733                        case 0: /* D6 vblank */
7734                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737                                if (rdev->irq.crtc_vblank_int[5]) {
7738                                        drm_handle_vblank(rdev->ddev, 5);
7739                                        rdev->pm.vblank_sync = true;
7740                                        wake_up(&rdev->irq.vblank_queue);
7741                                }
7742                                if (atomic_read(&rdev->irq.pflip[5]))
7743                                        radeon_crtc_handle_vblank(rdev, 5);
7744                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745                                DRM_DEBUG("IH: D6 vblank\n");
7746
7747                                break;
7748                        case 1: /* D6 vline */
7749                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753                                DRM_DEBUG("IH: D6 vline\n");
7754
7755                                break;
7756                        default:
7757                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758                                break;
7759                        }
7760                        break;
7761                case 8: /* D1 page flip */
7762                case 10: /* D2 page flip */
7763                case 12: /* D3 page flip */
7764                case 14: /* D4 page flip */
7765                case 16: /* D5 page flip */
7766                case 18: /* D6 page flip */
7767                        DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768                        if (radeon_use_pflipirq > 0)
7769                                radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770                        break;
7771                case 42: /* HPD hotplug */
7772                        switch (src_data) {
7773                        case 0:
7774                                if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777                                rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778                                queue_hotplug = true;
7779                                DRM_DEBUG("IH: HPD1\n");
7780
7781                                break;
7782                        case 1:
7783                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786                                rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787                                queue_hotplug = true;
7788                                DRM_DEBUG("IH: HPD2\n");
7789
7790                                break;
7791                        case 2:
7792                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796                                queue_hotplug = true;
7797                                DRM_DEBUG("IH: HPD3\n");
7798
7799                                break;
7800                        case 3:
7801                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805                                queue_hotplug = true;
7806                                DRM_DEBUG("IH: HPD4\n");
7807
7808                                break;
7809                        case 4:
7810                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814                                queue_hotplug = true;
7815                                DRM_DEBUG("IH: HPD5\n");
7816
7817                                break;
7818                        case 5:
7819                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823                                queue_hotplug = true;
7824                                DRM_DEBUG("IH: HPD6\n");
7825
7826                                break;
7827                        case 6:
7828                                if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831                                rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832                                queue_dp = true;
7833                                DRM_DEBUG("IH: HPD_RX 1\n");
7834
7835                                break;
7836                        case 7:
7837                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840                                rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841                                queue_dp = true;
7842                                DRM_DEBUG("IH: HPD_RX 2\n");
7843
7844                                break;
7845                        case 8:
7846                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850                                queue_dp = true;
7851                                DRM_DEBUG("IH: HPD_RX 3\n");
7852
7853                                break;
7854                        case 9:
7855                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859                                queue_dp = true;
7860                                DRM_DEBUG("IH: HPD_RX 4\n");
7861
7862                                break;
7863                        case 10:
7864                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868                                queue_dp = true;
7869                                DRM_DEBUG("IH: HPD_RX 5\n");
7870
7871                                break;
7872                        case 11:
7873                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877                                queue_dp = true;
7878                                DRM_DEBUG("IH: HPD_RX 6\n");
7879
7880                                break;
7881                        default:
7882                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883                                break;
7884                        }
7885                        break;
7886                case 96:
7887                        DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888                        WREG32(SRBM_INT_ACK, 0x1);
7889                        break;
7890                case 124: /* UVD */
7891                        DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892                        radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893                        break;
7894                case 146:
7895                case 147:
7896                        addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897                        status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898                        mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899                        /* reset addr and status */
7900                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901                        if (addr == 0x0 && status == 0x0)
7902                                break;
7903                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905                                addr);
7906                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907                                status);
7908                        cik_vm_decode_fault(rdev, status, addr, mc_client);
7909                        break;
7910                case 167: /* VCE */
7911                        DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912                        switch (src_data) {
7913                        case 0:
7914                                radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915                                break;
7916                        case 1:
7917                                radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918                                break;
7919                        default:
7920                                DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921                                break;
7922                        }
7923                        break;
7924                case 176: /* GFX RB CP_INT */
7925                case 177: /* GFX IB CP_INT */
7926                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927                        break;
7928                case 181: /* CP EOP event */
7929                        DRM_DEBUG("IH: CP EOP\n");
7930                        /* XXX check the bitfield order! */
7931                        me_id = (ring_id & 0x60) >> 5;
7932                        pipe_id = (ring_id & 0x18) >> 3;
7933                        queue_id = (ring_id & 0x7) >> 0;
7934                        switch (me_id) {
7935                        case 0:
7936                                radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937                                break;
7938                        case 1:
7939                        case 2:
7940                                if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942                                if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944                                break;
7945                        }
7946                        break;
7947                case 184: /* CP Privileged reg access */
7948                        DRM_ERROR("Illegal register access in command stream\n");
7949                        /* XXX check the bitfield order! */
7950                        me_id = (ring_id & 0x60) >> 5;
7951                        switch (me_id) {
7952                        case 0:
7953                                /* This results in a full GPU reset, but all we need to do is soft
7954                                 * reset the CP for gfx
7955                                 */
7956                                queue_reset = true;
7957                                break;
7958                        case 1:
7959                                /* XXX compute */
7960                                queue_reset = true;
7961                                break;
7962                        case 2:
7963                                /* XXX compute */
7964                                queue_reset = true;
7965                                break;
7966                        }
7967                        break;
7968                case 185: /* CP Privileged inst */
7969                        DRM_ERROR("Illegal instruction in command stream\n");
7970                        /* XXX check the bitfield order! */
7971                        me_id = (ring_id & 0x60) >> 5;
7972                        switch (me_id) {
7973                        case 0:
7974                                /* This results in a full GPU reset, but all we need to do is soft
7975                                 * reset the CP for gfx
7976                                 */
7977                                queue_reset = true;
7978                                break;
7979                        case 1:
7980                                /* XXX compute */
7981                                queue_reset = true;
7982                                break;
7983                        case 2:
7984                                /* XXX compute */
7985                                queue_reset = true;
7986                                break;
7987                        }
7988                        break;
7989                case 224: /* SDMA trap event */
7990                        /* XXX check the bitfield order! */
7991                        me_id = (ring_id & 0x3) >> 0;
7992                        queue_id = (ring_id & 0xc) >> 2;
7993                        DRM_DEBUG("IH: SDMA trap\n");
7994                        switch (me_id) {
7995                        case 0:
7996                                switch (queue_id) {
7997                                case 0:
7998                                        radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999                                        break;
8000                                case 1:
8001                                        /* XXX compute */
8002                                        break;
8003                                case 2:
8004                                        /* XXX compute */
8005                                        break;
8006                                }
8007                                break;
8008                        case 1:
8009                                switch (queue_id) {
8010                                case 0:
8011                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012                                        break;
8013                                case 1:
8014                                        /* XXX compute */
8015                                        break;
8016                                case 2:
8017                                        /* XXX compute */
8018                                        break;
8019                                }
8020                                break;
8021                        }
8022                        break;
8023                case 230: /* thermal low to high */
8024                        DRM_DEBUG("IH: thermal low to high\n");
8025                        rdev->pm.dpm.thermal.high_to_low = false;
8026                        queue_thermal = true;
8027                        break;
8028                case 231: /* thermal high to low */
8029                        DRM_DEBUG("IH: thermal high to low\n");
8030                        rdev->pm.dpm.thermal.high_to_low = true;
8031                        queue_thermal = true;
8032                        break;
8033                case 233: /* GUI IDLE */
8034                        DRM_DEBUG("IH: GUI idle\n");
8035                        break;
8036                case 241: /* SDMA Privileged inst */
8037                case 247: /* SDMA Privileged inst */
8038                        DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039                        /* XXX check the bitfield order! */
8040                        me_id = (ring_id & 0x3) >> 0;
8041                        queue_id = (ring_id & 0xc) >> 2;
8042                        switch (me_id) {
8043                        case 0:
8044                                switch (queue_id) {
8045                                case 0:
8046                                        queue_reset = true;
8047                                        break;
8048                                case 1:
8049                                        /* XXX compute */
8050                                        queue_reset = true;
8051                                        break;
8052                                case 2:
8053                                        /* XXX compute */
8054                                        queue_reset = true;
8055                                        break;
8056                                }
8057                                break;
8058                        case 1:
8059                                switch (queue_id) {
8060                                case 0:
8061                                        queue_reset = true;
8062                                        break;
8063                                case 1:
8064                                        /* XXX compute */
8065                                        queue_reset = true;
8066                                        break;
8067                                case 2:
8068                                        /* XXX compute */
8069                                        queue_reset = true;
8070                                        break;
8071                                }
8072                                break;
8073                        }
8074                        break;
8075                default:
8076                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077                        break;
8078                }
8079
8080                /* wptr/rptr are in bytes! */
8081                rptr += 16;
8082                rptr &= rdev->ih.ptr_mask;
8083                WREG32(IH_RB_RPTR, rptr);
8084        }
8085        if (queue_dp)
8086                schedule_work(&rdev->dp_work);
8087        if (queue_hotplug)
8088                schedule_delayed_work(&rdev->hotplug_work, 0);
8089        if (queue_reset) {
8090                rdev->needs_reset = true;
8091                wake_up_all(&rdev->fence_queue);
8092        }
8093        if (queue_thermal)
8094                schedule_work(&rdev->pm.dpm.thermal.work);
8095        rdev->ih.rptr = rptr;
8096        atomic_set(&rdev->ih.lock, 0);
8097
8098        /* make sure wptr hasn't changed while processing */
8099        wptr = cik_get_ih_wptr(rdev);
8100        if (wptr != rptr)
8101                goto restart_ih;
8102
8103        return IRQ_HANDLED;
8104}
8105
8106/*
8107 * startup/shutdown callbacks
8108 */
8109static void cik_uvd_init(struct radeon_device *rdev)
8110{
8111        int r;
8112
8113        if (!rdev->has_uvd)
8114                return;
8115
8116        r = radeon_uvd_init(rdev);
8117        if (r) {
8118                dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119                /*
8120                 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121                 * to early fails cik_uvd_start() and thus nothing happens
8122                 * there. So it is pointless to try to go through that code
8123                 * hence why we disable uvd here.
8124                 */
8125                rdev->has_uvd = false;
8126                return;
8127        }
8128        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130}
8131
8132static void cik_uvd_start(struct radeon_device *rdev)
8133{
8134        int r;
8135
8136        if (!rdev->has_uvd)
8137                return;
8138
8139        r = radeon_uvd_resume(rdev);
8140        if (r) {
8141                dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142                goto error;
8143        }
8144        r = uvd_v4_2_resume(rdev);
8145        if (r) {
8146                dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147                goto error;
8148        }
8149        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150        if (r) {
8151                dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152                goto error;
8153        }
8154        return;
8155
8156error:
8157        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158}
8159
8160static void cik_uvd_resume(struct radeon_device *rdev)
8161{
8162        struct radeon_ring *ring;
8163        int r;
8164
8165        if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166                return;
8167
8168        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170        if (r) {
8171                dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172                return;
8173        }
8174        r = uvd_v1_0_init(rdev);
8175        if (r) {
8176                dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177                return;
8178        }
8179}
8180
8181static void cik_vce_init(struct radeon_device *rdev)
8182{
8183        int r;
8184
8185        if (!rdev->has_vce)
8186                return;
8187
8188        r = radeon_vce_init(rdev);
8189        if (r) {
8190                dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191                /*
8192                 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193                 * to early fails cik_vce_start() and thus nothing happens
8194                 * there. So it is pointless to try to go through that code
8195                 * hence why we disable vce here.
8196                 */
8197                rdev->has_vce = false;
8198                return;
8199        }
8200        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204}
8205
8206static void cik_vce_start(struct radeon_device *rdev)
8207{
8208        int r;
8209
8210        if (!rdev->has_vce)
8211                return;
8212
8213        r = radeon_vce_resume(rdev);
8214        if (r) {
8215                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216                goto error;
8217        }
8218        r = vce_v2_0_resume(rdev);
8219        if (r) {
8220                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221                goto error;
8222        }
8223        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224        if (r) {
8225                dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226                goto error;
8227        }
8228        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229        if (r) {
8230                dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231                goto error;
8232        }
8233        return;
8234
8235error:
8236        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238}
8239
8240static void cik_vce_resume(struct radeon_device *rdev)
8241{
8242        struct radeon_ring *ring;
8243        int r;
8244
8245        if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246                return;
8247
8248        ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250        if (r) {
8251                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252                return;
8253        }
8254        ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256        if (r) {
8257                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258                return;
8259        }
8260        r = vce_v1_0_init(rdev);
8261        if (r) {
8262                dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263                return;
8264        }
8265}
8266
8267/**
8268 * cik_startup - program the asic to a functional state
8269 *
8270 * @rdev: radeon_device pointer
8271 *
8272 * Programs the asic to a functional state (CIK).
8273 * Called by cik_init() and cik_resume().
8274 * Returns 0 for success, error for failure.
8275 */
8276static int cik_startup(struct radeon_device *rdev)
8277{
8278        struct radeon_ring *ring;
8279        u32 nop;
8280        int r;
8281
8282        /* enable pcie gen2/3 link */
8283        cik_pcie_gen3_enable(rdev);
8284        /* enable aspm */
8285        cik_program_aspm(rdev);
8286
8287        /* scratch needs to be initialized before MC */
8288        r = r600_vram_scratch_init(rdev);
8289        if (r)
8290                return r;
8291
8292        cik_mc_program(rdev);
8293
8294        if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295                r = ci_mc_load_microcode(rdev);
8296                if (r) {
8297                        DRM_ERROR("Failed to load MC firmware!\n");
8298                        return r;
8299                }
8300        }
8301
8302        r = cik_pcie_gart_enable(rdev);
8303        if (r)
8304                return r;
8305        cik_gpu_init(rdev);
8306
8307        /* allocate rlc buffers */
8308        if (rdev->flags & RADEON_IS_IGP) {
8309                if (rdev->family == CHIP_KAVERI) {
8310                        rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311                        rdev->rlc.reg_list_size =
8312                                (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313                } else {
8314                        rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315                        rdev->rlc.reg_list_size =
8316                                (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317                }
8318        }
8319        rdev->rlc.cs_data = ci_cs_data;
8320        rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321        rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322        r = sumo_rlc_init(rdev);
8323        if (r) {
8324                DRM_ERROR("Failed to init rlc BOs!\n");
8325                return r;
8326        }
8327
8328        /* allocate wb buffer */
8329        r = radeon_wb_init(rdev);
8330        if (r)
8331                return r;
8332
8333        /* allocate mec buffers */
8334        r = cik_mec_init(rdev);
8335        if (r) {
8336                DRM_ERROR("Failed to init MEC BOs!\n");
8337                return r;
8338        }
8339
8340        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341        if (r) {
8342                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343                return r;
8344        }
8345
8346        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347        if (r) {
8348                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349                return r;
8350        }
8351
8352        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353        if (r) {
8354                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355                return r;
8356        }
8357
8358        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359        if (r) {
8360                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361                return r;
8362        }
8363
8364        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365        if (r) {
8366                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367                return r;
8368        }
8369
8370        cik_uvd_start(rdev);
8371        cik_vce_start(rdev);
8372
8373        /* Enable IRQ */
8374        if (!rdev->irq.installed) {
8375                r = radeon_irq_kms_init(rdev);
8376                if (r)
8377                        return r;
8378        }
8379
8380        r = cik_irq_init(rdev);
8381        if (r) {
8382                DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383                radeon_irq_kms_fini(rdev);
8384                return r;
8385        }
8386        cik_irq_set(rdev);
8387
8388        if (rdev->family == CHIP_HAWAII) {
8389                if (rdev->new_fw)
8390                        nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391                else
8392                        nop = RADEON_CP_PACKET2;
8393        } else {
8394                nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395        }
8396
8397        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399                             nop);
8400        if (r)
8401                return r;
8402
8403        /* set up the compute queues */
8404        /* type-2 packets are deprecated on MEC, use type-3 instead */
8405        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407                             nop);
8408        if (r)
8409                return r;
8410        ring->me = 1; /* first MEC */
8411        ring->pipe = 0; /* first pipe */
8412        ring->queue = 0; /* first queue */
8413        ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414
8415        /* type-2 packets are deprecated on MEC, use type-3 instead */
8416        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418                             nop);
8419        if (r)
8420                return r;
8421        /* dGPU only have 1 MEC */
8422        ring->me = 1; /* first MEC */
8423        ring->pipe = 0; /* first pipe */
8424        ring->queue = 1; /* second queue */
8425        ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426
8427        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429                             SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430        if (r)
8431                return r;
8432
8433        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435                             SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436        if (r)
8437                return r;
8438
8439        r = cik_cp_resume(rdev);
8440        if (r)
8441                return r;
8442
8443        r = cik_sdma_resume(rdev);
8444        if (r)
8445                return r;
8446
8447        cik_uvd_resume(rdev);
8448        cik_vce_resume(rdev);
8449
8450        r = radeon_ib_pool_init(rdev);
8451        if (r) {
8452                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453                return r;
8454        }
8455
8456        r = radeon_vm_manager_init(rdev);
8457        if (r) {
8458                dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459                return r;
8460        }
8461
8462        r = radeon_audio_init(rdev);
8463        if (r)
8464                return r;
8465
8466        return 0;
8467}
8468
8469/**
8470 * cik_resume - resume the asic to a functional state
8471 *
8472 * @rdev: radeon_device pointer
8473 *
8474 * Programs the asic to a functional state (CIK).
8475 * Called at resume.
8476 * Returns 0 for success, error for failure.
8477 */
8478int cik_resume(struct radeon_device *rdev)
8479{
8480        int r;
8481
8482        /* post card */
8483        atom_asic_init(rdev->mode_info.atom_context);
8484
8485        /* init golden registers */
8486        cik_init_golden_registers(rdev);
8487
8488        if (rdev->pm.pm_method == PM_METHOD_DPM)
8489                radeon_pm_resume(rdev);
8490
8491        rdev->accel_working = true;
8492        r = cik_startup(rdev);
8493        if (r) {
8494                DRM_ERROR("cik startup failed on resume\n");
8495                rdev->accel_working = false;
8496                return r;
8497        }
8498
8499        return r;
8500
8501}
8502
8503/**
8504 * cik_suspend - suspend the asic
8505 *
8506 * @rdev: radeon_device pointer
8507 *
8508 * Bring the chip into a state suitable for suspend (CIK).
8509 * Called at suspend.
8510 * Returns 0 for success.
8511 */
8512int cik_suspend(struct radeon_device *rdev)
8513{
8514        radeon_pm_suspend(rdev);
8515        radeon_audio_fini(rdev);
8516        radeon_vm_manager_fini(rdev);
8517        cik_cp_enable(rdev, false);
8518        cik_sdma_enable(rdev, false);
8519        if (rdev->has_uvd) {
8520                uvd_v1_0_fini(rdev);
8521                radeon_uvd_suspend(rdev);
8522        }
8523        if (rdev->has_vce)
8524                radeon_vce_suspend(rdev);
8525        cik_fini_pg(rdev);
8526        cik_fini_cg(rdev);
8527        cik_irq_suspend(rdev);
8528        radeon_wb_disable(rdev);
8529        cik_pcie_gart_disable(rdev);
8530        return 0;
8531}
8532
8533/* Plan is to move initialization in that function and use
8534 * helper function so that radeon_device_init pretty much
8535 * do nothing more than calling asic specific function. This
8536 * should also allow to remove a bunch of callback function
8537 * like vram_info.
8538 */
8539/**
8540 * cik_init - asic specific driver and hw init
8541 *
8542 * @rdev: radeon_device pointer
8543 *
8544 * Setup asic specific driver variables and program the hw
8545 * to a functional state (CIK).
8546 * Called at driver startup.
8547 * Returns 0 for success, errors for failure.
8548 */
8549int cik_init(struct radeon_device *rdev)
8550{
8551        struct radeon_ring *ring;
8552        int r;
8553
8554        /* Read BIOS */
8555        if (!radeon_get_bios(rdev)) {
8556                if (ASIC_IS_AVIVO(rdev))
8557                        return -EINVAL;
8558        }
8559        /* Must be an ATOMBIOS */
8560        if (!rdev->is_atom_bios) {
8561                dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562                return -EINVAL;
8563        }
8564        r = radeon_atombios_init(rdev);
8565        if (r)
8566                return r;
8567
8568        /* Post card if necessary */
8569        if (!radeon_card_posted(rdev)) {
8570                if (!rdev->bios) {
8571                        dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572                        return -EINVAL;
8573                }
8574                DRM_INFO("GPU not posted. posting now...\n");
8575                atom_asic_init(rdev->mode_info.atom_context);
8576        }
8577        /* init golden registers */
8578        cik_init_golden_registers(rdev);
8579        /* Initialize scratch registers */
8580        cik_scratch_init(rdev);
8581        /* Initialize surface registers */
8582        radeon_surface_init(rdev);
8583        /* Initialize clocks */
8584        radeon_get_clock_info(rdev->ddev);
8585
8586        /* Fence driver */
8587        radeon_fence_driver_init(rdev);
8588
8589        /* initialize memory controller */
8590        r = cik_mc_init(rdev);
8591        if (r)
8592                return r;
8593        /* Memory manager */
8594        r = radeon_bo_init(rdev);
8595        if (r)
8596                return r;
8597
8598        if (rdev->flags & RADEON_IS_IGP) {
8599                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600                    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601                        r = cik_init_microcode(rdev);
8602                        if (r) {
8603                                DRM_ERROR("Failed to load firmware!\n");
8604                                return r;
8605                        }
8606                }
8607        } else {
8608                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609                    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610                    !rdev->mc_fw) {
8611                        r = cik_init_microcode(rdev);
8612                        if (r) {
8613                                DRM_ERROR("Failed to load firmware!\n");
8614                                return r;
8615                        }
8616                }
8617        }
8618
8619        /* Initialize power management */
8620        radeon_pm_init(rdev);
8621
8622        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623        ring->ring_obj = NULL;
8624        r600_ring_init(rdev, ring, 1024 * 1024);
8625
8626        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627        ring->ring_obj = NULL;
8628        r600_ring_init(rdev, ring, 1024 * 1024);
8629        r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630        if (r)
8631                return r;
8632
8633        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634        ring->ring_obj = NULL;
8635        r600_ring_init(rdev, ring, 1024 * 1024);
8636        r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637        if (r)
8638                return r;
8639
8640        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641        ring->ring_obj = NULL;
8642        r600_ring_init(rdev, ring, 256 * 1024);
8643
8644        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645        ring->ring_obj = NULL;
8646        r600_ring_init(rdev, ring, 256 * 1024);
8647
8648        cik_uvd_init(rdev);
8649        cik_vce_init(rdev);
8650
8651        rdev->ih.ring_obj = NULL;
8652        r600_ih_ring_init(rdev, 64 * 1024);
8653
8654        r = r600_pcie_gart_init(rdev);
8655        if (r)
8656                return r;
8657
8658        rdev->accel_working = true;
8659        r = cik_startup(rdev);
8660        if (r) {
8661                dev_err(rdev->dev, "disabling GPU acceleration\n");
8662                cik_cp_fini(rdev);
8663                cik_sdma_fini(rdev);
8664                cik_irq_fini(rdev);
8665                sumo_rlc_fini(rdev);
8666                cik_mec_fini(rdev);
8667                radeon_wb_fini(rdev);
8668                radeon_ib_pool_fini(rdev);
8669                radeon_vm_manager_fini(rdev);
8670                radeon_irq_kms_fini(rdev);
8671                cik_pcie_gart_fini(rdev);
8672                rdev->accel_working = false;
8673        }
8674
8675        /* Don't start up if the MC ucode is missing.
8676         * The default clocks and voltages before the MC ucode
8677         * is loaded are not suffient for advanced operations.
8678         */
8679        if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680                DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681                return -EINVAL;
8682        }
8683
8684        return 0;
8685}
8686
8687/**
8688 * cik_fini - asic specific driver and hw fini
8689 *
8690 * @rdev: radeon_device pointer
8691 *
8692 * Tear down the asic specific driver variables and program the hw
8693 * to an idle state (CIK).
8694 * Called at driver unload.
8695 */
8696void cik_fini(struct radeon_device *rdev)
8697{
8698        radeon_pm_fini(rdev);
8699        cik_cp_fini(rdev);
8700        cik_sdma_fini(rdev);
8701        cik_fini_pg(rdev);
8702        cik_fini_cg(rdev);
8703        cik_irq_fini(rdev);
8704        sumo_rlc_fini(rdev);
8705        cik_mec_fini(rdev);
8706        radeon_wb_fini(rdev);
8707        radeon_vm_manager_fini(rdev);
8708        radeon_ib_pool_fini(rdev);
8709        radeon_irq_kms_fini(rdev);
8710        uvd_v1_0_fini(rdev);
8711        radeon_uvd_fini(rdev);
8712        radeon_vce_fini(rdev);
8713        cik_pcie_gart_fini(rdev);
8714        r600_vram_scratch_fini(rdev);
8715        radeon_gem_fini(rdev);
8716        radeon_fence_driver_fini(rdev);
8717        radeon_bo_fini(rdev);
8718        radeon_atombios_fini(rdev);
8719        kfree(rdev->bios);
8720        rdev->bios = NULL;
8721}
8722
8723void dce8_program_fmt(struct drm_encoder *encoder)
8724{
8725        struct drm_device *dev = encoder->dev;
8726        struct radeon_device *rdev = dev->dev_private;
8727        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728        struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730        int bpc = 0;
8731        u32 tmp = 0;
8732        enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733
8734        if (connector) {
8735                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736                bpc = radeon_get_monitor_bpc(connector);
8737                dither = radeon_connector->dither;
8738        }
8739
8740        /* LVDS/eDP FMT is set up by atom */
8741        if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742                return;
8743
8744        /* not needed for analog */
8745        if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746            (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747                return;
8748
8749        if (bpc == 0)
8750                return;
8751
8752        switch (bpc) {
8753        case 6:
8754                if (dither == RADEON_FMT_DITHER_ENABLE)
8755                        /* XXX sort out optimal dither settings */
8756                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758                else
8759                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760                break;
8761        case 8:
8762                if (dither == RADEON_FMT_DITHER_ENABLE)
8763                        /* XXX sort out optimal dither settings */
8764                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765                                FMT_RGB_RANDOM_ENABLE |
8766                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767                else
8768                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769                break;
8770        case 10:
8771                if (dither == RADEON_FMT_DITHER_ENABLE)
8772                        /* XXX sort out optimal dither settings */
8773                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774                                FMT_RGB_RANDOM_ENABLE |
8775                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776                else
8777                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778                break;
8779        default:
8780                /* not needed */
8781                break;
8782        }
8783
8784        WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785}
8786
8787/* display watermark setup */
8788/**
8789 * dce8_line_buffer_adjust - Set up the line buffer
8790 *
8791 * @rdev: radeon_device pointer
8792 * @radeon_crtc: the selected display controller
8793 * @mode: the current display mode on the selected display
8794 * controller
8795 *
8796 * Setup up the line buffer allocation for
8797 * the selected display controller (CIK).
8798 * Returns the line buffer size in pixels.
8799 */
8800static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801                                   struct radeon_crtc *radeon_crtc,
8802                                   struct drm_display_mode *mode)
8803{
8804        u32 tmp, buffer_alloc, i;
8805        u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806        /*
8807         * Line Buffer Setup
8808         * There are 6 line buffers, one for each display controllers.
8809         * There are 3 partitions per LB. Select the number of partitions
8810         * to enable based on the display width.  For display widths larger
8811         * than 4096, you need use to use 2 display controllers and combine
8812         * them using the stereo blender.
8813         */
8814        if (radeon_crtc->base.enabled && mode) {
8815                if (mode->crtc_hdisplay < 1920) {
8816                        tmp = 1;
8817                        buffer_alloc = 2;
8818                } else if (mode->crtc_hdisplay < 2560) {
8819                        tmp = 2;
8820                        buffer_alloc = 2;
8821                } else if (mode->crtc_hdisplay < 4096) {
8822                        tmp = 0;
8823                        buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824                } else {
8825                        DRM_DEBUG_KMS("Mode too big for LB!\n");
8826                        tmp = 0;
8827                        buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828                }
8829        } else {
8830                tmp = 1;
8831                buffer_alloc = 0;
8832        }
8833
8834        WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835               LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836
8837        WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838               DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839        for (i = 0; i < rdev->usec_timeout; i++) {
8840                if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841                    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842                        break;
8843                udelay(1);
8844        }
8845
8846        if (radeon_crtc->base.enabled && mode) {
8847                switch (tmp) {
8848                case 0:
8849                default:
8850                        return 4096 * 2;
8851                case 1:
8852                        return 1920 * 2;
8853                case 2:
8854                        return 2560 * 2;
8855                }
8856        }
8857
8858        /* controller not enabled, so no lb used */
8859        return 0;
8860}
8861
8862/**
8863 * cik_get_number_of_dram_channels - get the number of dram channels
8864 *
8865 * @rdev: radeon_device pointer
8866 *
8867 * Look up the number of video ram channels (CIK).
8868 * Used for display watermark bandwidth calculations
8869 * Returns the number of dram channels
8870 */
8871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872{
8873        u32 tmp = RREG32(MC_SHARED_CHMAP);
8874
8875        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876        case 0:
8877        default:
8878                return 1;
8879        case 1:
8880                return 2;
8881        case 2:
8882                return 4;
8883        case 3:
8884                return 8;
8885        case 4:
8886                return 3;
8887        case 5:
8888                return 6;
8889        case 6:
8890                return 10;
8891        case 7:
8892                return 12;
8893        case 8:
8894                return 16;
8895        }
8896}
8897
8898struct dce8_wm_params {
8899        u32 dram_channels; /* number of dram channels */
8900        u32 yclk;          /* bandwidth per dram data pin in kHz */
8901        u32 sclk;          /* engine clock in kHz */
8902        u32 disp_clk;      /* display clock in kHz */
8903        u32 src_width;     /* viewport width */
8904        u32 active_time;   /* active display time in ns */
8905        u32 blank_time;    /* blank time in ns */
8906        bool interlaced;    /* mode is interlaced */
8907        fixed20_12 vsc;    /* vertical scale ratio */
8908        u32 num_heads;     /* number of active crtcs */
8909        u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910        u32 lb_size;       /* line buffer allocated to pipe */
8911        u32 vtaps;         /* vertical scaler taps */
8912};
8913
8914/**
8915 * dce8_dram_bandwidth - get the dram bandwidth
8916 *
8917 * @wm: watermark calculation data
8918 *
8919 * Calculate the raw dram bandwidth (CIK).
8920 * Used for display watermark bandwidth calculations
8921 * Returns the dram bandwidth in MBytes/s
8922 */
8923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924{
8925        /* Calculate raw DRAM Bandwidth */
8926        fixed20_12 dram_efficiency; /* 0.7 */
8927        fixed20_12 yclk, dram_channels, bandwidth;
8928        fixed20_12 a;
8929
8930        a.full = dfixed_const(1000);
8931        yclk.full = dfixed_const(wm->yclk);
8932        yclk.full = dfixed_div(yclk, a);
8933        dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934        a.full = dfixed_const(10);
8935        dram_efficiency.full = dfixed_const(7);
8936        dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937        bandwidth.full = dfixed_mul(dram_channels, yclk);
8938        bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939
8940        return dfixed_trunc(bandwidth);
8941}
8942
8943/**
8944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945 *
8946 * @wm: watermark calculation data
8947 *
8948 * Calculate the dram bandwidth used for display (CIK).
8949 * Used for display watermark bandwidth calculations
8950 * Returns the dram bandwidth for display in MBytes/s
8951 */
8952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953{
8954        /* Calculate DRAM Bandwidth and the part allocated to display. */
8955        fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956        fixed20_12 yclk, dram_channels, bandwidth;
8957        fixed20_12 a;
8958
8959        a.full = dfixed_const(1000);
8960        yclk.full = dfixed_const(wm->yclk);
8961        yclk.full = dfixed_div(yclk, a);
8962        dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963        a.full = dfixed_const(10);
8964        disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965        disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966        bandwidth.full = dfixed_mul(dram_channels, yclk);
8967        bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968
8969        return dfixed_trunc(bandwidth);
8970}
8971
8972/**
8973 * dce8_data_return_bandwidth - get the data return bandwidth
8974 *
8975 * @wm: watermark calculation data
8976 *
8977 * Calculate the data return bandwidth used for display (CIK).
8978 * Used for display watermark bandwidth calculations
8979 * Returns the data return bandwidth in MBytes/s
8980 */
8981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982{
8983        /* Calculate the display Data return Bandwidth */
8984        fixed20_12 return_efficiency; /* 0.8 */
8985        fixed20_12 sclk, bandwidth;
8986        fixed20_12 a;
8987
8988        a.full = dfixed_const(1000);
8989        sclk.full = dfixed_const(wm->sclk);
8990        sclk.full = dfixed_div(sclk, a);
8991        a.full = dfixed_const(10);
8992        return_efficiency.full = dfixed_const(8);
8993        return_efficiency.full = dfixed_div(return_efficiency, a);
8994        a.full = dfixed_const(32);
8995        bandwidth.full = dfixed_mul(a, sclk);
8996        bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997
8998        return dfixed_trunc(bandwidth);
8999}
9000
9001/**
9002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003 *
9004 * @wm: watermark calculation data
9005 *
9006 * Calculate the dmif bandwidth used for display (CIK).
9007 * Used for display watermark bandwidth calculations
9008 * Returns the dmif bandwidth in MBytes/s
9009 */
9010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011{
9012        /* Calculate the DMIF Request Bandwidth */
9013        fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014        fixed20_12 disp_clk, bandwidth;
9015        fixed20_12 a, b;
9016
9017        a.full = dfixed_const(1000);
9018        disp_clk.full = dfixed_const(wm->disp_clk);
9019        disp_clk.full = dfixed_div(disp_clk, a);
9020        a.full = dfixed_const(32);
9021        b.full = dfixed_mul(a, disp_clk);
9022
9023        a.full = dfixed_const(10);
9024        disp_clk_request_efficiency.full = dfixed_const(8);
9025        disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026
9027        bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028
9029        return dfixed_trunc(bandwidth);
9030}
9031
9032/**
9033 * dce8_available_bandwidth - get the min available bandwidth
9034 *
9035 * @wm: watermark calculation data
9036 *
9037 * Calculate the min available bandwidth used for display (CIK).
9038 * Used for display watermark bandwidth calculations
9039 * Returns the min available bandwidth in MBytes/s
9040 */
9041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042{
9043        /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044        u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045        u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046        u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047
9048        return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049}
9050
9051/**
9052 * dce8_average_bandwidth - get the average available bandwidth
9053 *
9054 * @wm: watermark calculation data
9055 *
9056 * Calculate the average available bandwidth used for display (CIK).
9057 * Used for display watermark bandwidth calculations
9058 * Returns the average available bandwidth in MBytes/s
9059 */
9060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061{
9062        /* Calculate the display mode Average Bandwidth
9063         * DisplayMode should contain the source and destination dimensions,
9064         * timing, etc.
9065         */
9066        fixed20_12 bpp;
9067        fixed20_12 line_time;
9068        fixed20_12 src_width;
9069        fixed20_12 bandwidth;
9070        fixed20_12 a;
9071
9072        a.full = dfixed_const(1000);
9073        line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074        line_time.full = dfixed_div(line_time, a);
9075        bpp.full = dfixed_const(wm->bytes_per_pixel);
9076        src_width.full = dfixed_const(wm->src_width);
9077        bandwidth.full = dfixed_mul(src_width, bpp);
9078        bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079        bandwidth.full = dfixed_div(bandwidth, line_time);
9080
9081        return dfixed_trunc(bandwidth);
9082}
9083
9084/**
9085 * dce8_latency_watermark - get the latency watermark
9086 *
9087 * @wm: watermark calculation data
9088 *
9089 * Calculate the latency watermark (CIK).
9090 * Used for display watermark bandwidth calculations
9091 * Returns the latency watermark in ns
9092 */
9093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094{
9095        /* First calculate the latency in ns */
9096        u32 mc_latency = 2000; /* 2000 ns. */
9097        u32 available_bandwidth = dce8_available_bandwidth(wm);
9098        u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099        u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100        u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101        u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102                (wm->num_heads * cursor_line_pair_return_time);
9103        u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104        u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105        u32 tmp, dmif_size = 12288;
9106        fixed20_12 a, b, c;
9107
9108        if (wm->num_heads == 0)
9109                return 0;
9110
9111        a.full = dfixed_const(2);
9112        b.full = dfixed_const(1);
9113        if ((wm->vsc.full > a.full) ||
9114            ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115            (wm->vtaps >= 5) ||
9116            ((wm->vsc.full >= a.full) && wm->interlaced))
9117                max_src_lines_per_dst_line = 4;
9118        else
9119                max_src_lines_per_dst_line = 2;
9120
9121        a.full = dfixed_const(available_bandwidth);
9122        b.full = dfixed_const(wm->num_heads);
9123        a.full = dfixed_div(a, b);
9124        tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125        tmp = min(dfixed_trunc(a), tmp);
9126
9127        lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128
9129        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130        b.full = dfixed_const(1000);
9131        c.full = dfixed_const(lb_fill_bw);
9132        b.full = dfixed_div(c, b);
9133        a.full = dfixed_div(a, b);
9134        line_fill_time = dfixed_trunc(a);
9135
9136        if (line_fill_time < wm->active_time)
9137                return latency;
9138        else
9139                return latency + (line_fill_time - wm->active_time);
9140
9141}
9142
9143/**
9144 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145 * average and available dram bandwidth
9146 *
9147 * @wm: watermark calculation data
9148 *
9149 * Check if the display average bandwidth fits in the display
9150 * dram bandwidth (CIK).
9151 * Used for display watermark bandwidth calculations
9152 * Returns true if the display fits, false if not.
9153 */
9154static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155{
9156        if (dce8_average_bandwidth(wm) <=
9157            (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158                return true;
9159        else
9160                return false;
9161}
9162
9163/**
9164 * dce8_average_bandwidth_vs_available_bandwidth - check
9165 * average and available bandwidth
9166 *
9167 * @wm: watermark calculation data
9168 *
9169 * Check if the display average bandwidth fits in the display
9170 * available bandwidth (CIK).
9171 * Used for display watermark bandwidth calculations
9172 * Returns true if the display fits, false if not.
9173 */
9174static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175{
9176        if (dce8_average_bandwidth(wm) <=
9177            (dce8_available_bandwidth(wm) / wm->num_heads))
9178                return true;
9179        else
9180                return false;
9181}
9182
9183/**
9184 * dce8_check_latency_hiding - check latency hiding
9185 *
9186 * @wm: watermark calculation data
9187 *
9188 * Check latency hiding (CIK).
9189 * Used for display watermark bandwidth calculations
9190 * Returns true if the display fits, false if not.
9191 */
9192static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193{
9194        u32 lb_partitions = wm->lb_size / wm->src_width;
9195        u32 line_time = wm->active_time + wm->blank_time;
9196        u32 latency_tolerant_lines;
9197        u32 latency_hiding;
9198        fixed20_12 a;
9199
9200        a.full = dfixed_const(1);
9201        if (wm->vsc.full > a.full)
9202                latency_tolerant_lines = 1;
9203        else {
9204                if (lb_partitions <= (wm->vtaps + 1))
9205                        latency_tolerant_lines = 1;
9206                else
9207                        latency_tolerant_lines = 2;
9208        }
9209
9210        latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211
9212        if (dce8_latency_watermark(wm) <= latency_hiding)
9213                return true;
9214        else
9215                return false;
9216}
9217
9218/**
9219 * dce8_program_watermarks - program display watermarks
9220 *
9221 * @rdev: radeon_device pointer
9222 * @radeon_crtc: the selected display controller
9223 * @lb_size: line buffer size
9224 * @num_heads: number of display controllers in use
9225 *
9226 * Calculate and program the display watermarks for the
9227 * selected display controller (CIK).
9228 */
9229static void dce8_program_watermarks(struct radeon_device *rdev,
9230                                    struct radeon_crtc *radeon_crtc,
9231                                    u32 lb_size, u32 num_heads)
9232{
9233        struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234        struct dce8_wm_params wm_low, wm_high;
9235        u32 active_time;
9236        u32 line_time = 0;
9237        u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238        u32 tmp, wm_mask;
9239
9240        if (radeon_crtc->base.enabled && num_heads && mode) {
9241                active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242                                            (u32)mode->clock);
9243                line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244                                          (u32)mode->clock);
9245                line_time = min(line_time, (u32)65535);
9246
9247                /* watermark for high clocks */
9248                if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249                    rdev->pm.dpm_enabled) {
9250                        wm_high.yclk =
9251                                radeon_dpm_get_mclk(rdev, false) * 10;
9252                        wm_high.sclk =
9253                                radeon_dpm_get_sclk(rdev, false) * 10;
9254                } else {
9255                        wm_high.yclk = rdev->pm.current_mclk * 10;
9256                        wm_high.sclk = rdev->pm.current_sclk * 10;
9257                }
9258
9259                wm_high.disp_clk = mode->clock;
9260                wm_high.src_width = mode->crtc_hdisplay;
9261                wm_high.active_time = active_time;
9262                wm_high.blank_time = line_time - wm_high.active_time;
9263                wm_high.interlaced = false;
9264                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265                        wm_high.interlaced = true;
9266                wm_high.vsc = radeon_crtc->vsc;
9267                wm_high.vtaps = 1;
9268                if (radeon_crtc->rmx_type != RMX_OFF)
9269                        wm_high.vtaps = 2;
9270                wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271                wm_high.lb_size = lb_size;
9272                wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273                wm_high.num_heads = num_heads;
9274
9275                /* set for high clocks */
9276                latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277
9278                /* possibly force display priority to high */
9279                /* should really do this at mode validation time... */
9280                if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281                    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282                    !dce8_check_latency_hiding(&wm_high) ||
9283                    (rdev->disp_priority == 2)) {
9284                        DRM_DEBUG_KMS("force priority to high\n");
9285                }
9286
9287                /* watermark for low clocks */
9288                if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289                    rdev->pm.dpm_enabled) {
9290                        wm_low.yclk =
9291                                radeon_dpm_get_mclk(rdev, true) * 10;
9292                        wm_low.sclk =
9293                                radeon_dpm_get_sclk(rdev, true) * 10;
9294                } else {
9295                        wm_low.yclk = rdev->pm.current_mclk * 10;
9296                        wm_low.sclk = rdev->pm.current_sclk * 10;
9297                }
9298
9299                wm_low.disp_clk = mode->clock;
9300                wm_low.src_width = mode->crtc_hdisplay;
9301                wm_low.active_time = active_time;
9302                wm_low.blank_time = line_time - wm_low.active_time;
9303                wm_low.interlaced = false;
9304                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305                        wm_low.interlaced = true;
9306                wm_low.vsc = radeon_crtc->vsc;
9307                wm_low.vtaps = 1;
9308                if (radeon_crtc->rmx_type != RMX_OFF)
9309                        wm_low.vtaps = 2;
9310                wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311                wm_low.lb_size = lb_size;
9312                wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313                wm_low.num_heads = num_heads;
9314
9315                /* set for low clocks */
9316                latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317
9318                /* possibly force display priority to high */
9319                /* should really do this at mode validation time... */
9320                if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321                    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322                    !dce8_check_latency_hiding(&wm_low) ||
9323                    (rdev->disp_priority == 2)) {
9324                        DRM_DEBUG_KMS("force priority to high\n");
9325                }
9326
9327                /* Save number of lines the linebuffer leads before the scanout */
9328                radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329        }
9330
9331        /* select wm A */
9332        wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333        tmp = wm_mask;
9334        tmp &= ~LATENCY_WATERMARK_MASK(3);
9335        tmp |= LATENCY_WATERMARK_MASK(1);
9336        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338               (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339                LATENCY_HIGH_WATERMARK(line_time)));
9340        /* select wm B */
9341        tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342        tmp &= ~LATENCY_WATERMARK_MASK(3);
9343        tmp |= LATENCY_WATERMARK_MASK(2);
9344        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346               (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347                LATENCY_HIGH_WATERMARK(line_time)));
9348        /* restore original selection */
9349        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350
9351        /* save values for DPM */
9352        radeon_crtc->line_time = line_time;
9353        radeon_crtc->wm_high = latency_watermark_a;
9354        radeon_crtc->wm_low = latency_watermark_b;
9355}
9356
9357/**
9358 * dce8_bandwidth_update - program display watermarks
9359 *
9360 * @rdev: radeon_device pointer
9361 *
9362 * Calculate and program the display watermarks and line
9363 * buffer allocation (CIK).
9364 */
9365void dce8_bandwidth_update(struct radeon_device *rdev)
9366{
9367        struct drm_display_mode *mode = NULL;
9368        u32 num_heads = 0, lb_size;
9369        int i;
9370
9371        if (!rdev->mode_info.mode_config_initialized)
9372                return;
9373
9374        radeon_update_display_priority(rdev);
9375
9376        for (i = 0; i < rdev->num_crtc; i++) {
9377                if (rdev->mode_info.crtcs[i]->base.enabled)
9378                        num_heads++;
9379        }
9380        for (i = 0; i < rdev->num_crtc; i++) {
9381                mode = &rdev->mode_info.crtcs[i]->base.mode;
9382                lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383                dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384        }
9385}
9386
9387/**
9388 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389 *
9390 * @rdev: radeon_device pointer
9391 *
9392 * Fetches a GPU clock counter snapshot (SI).
9393 * Returns the 64 bit clock counter snapshot.
9394 */
9395uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396{
9397        uint64_t clock;
9398
9399        mutex_lock(&rdev->gpu_clock_mutex);
9400        WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401        clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402                ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403        mutex_unlock(&rdev->gpu_clock_mutex);
9404        return clock;
9405}
9406
9407static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408                             u32 cntl_reg, u32 status_reg)
9409{
9410        int r, i;
9411        struct atom_clock_dividers dividers;
9412        uint32_t tmp;
9413
9414        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415                                           clock, false, &dividers);
9416        if (r)
9417                return r;
9418
9419        tmp = RREG32_SMC(cntl_reg);
9420        tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421        tmp |= dividers.post_divider;
9422        WREG32_SMC(cntl_reg, tmp);
9423
9424        for (i = 0; i < 100; i++) {
9425                if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426                        break;
9427                mdelay(10);
9428        }
9429        if (i == 100)
9430                return -ETIMEDOUT;
9431
9432        return 0;
9433}
9434
9435int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436{
9437        int r = 0;
9438
9439        r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440        if (r)
9441                return r;
9442
9443        r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444        return r;
9445}
9446
9447int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448{
9449        int r, i;
9450        struct atom_clock_dividers dividers;
9451        u32 tmp;
9452
9453        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454                                           ecclk, false, &dividers);
9455        if (r)
9456                return r;
9457
9458        for (i = 0; i < 100; i++) {
9459                if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460                        break;
9461                mdelay(10);
9462        }
9463        if (i == 100)
9464                return -ETIMEDOUT;
9465
9466        tmp = RREG32_SMC(CG_ECLK_CNTL);
9467        tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468        tmp |= dividers.post_divider;
9469        WREG32_SMC(CG_ECLK_CNTL, tmp);
9470
9471        for (i = 0; i < 100; i++) {
9472                if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473                        break;
9474                mdelay(10);
9475        }
9476        if (i == 100)
9477                return -ETIMEDOUT;
9478
9479        return 0;
9480}
9481
9482static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483{
9484        struct pci_dev *root = rdev->pdev->bus->self;
9485        enum pci_bus_speed speed_cap;
9486        u32 speed_cntl, current_data_rate;
9487        int i;
9488        u16 tmp16;
9489
9490        if (pci_is_root_bus(rdev->pdev->bus))
9491                return;
9492
9493        if (radeon_pcie_gen2 == 0)
9494                return;
9495
9496        if (rdev->flags & RADEON_IS_IGP)
9497                return;
9498
9499        if (!(rdev->flags & RADEON_IS_PCIE))
9500                return;
9501
9502        speed_cap = pcie_get_speed_cap(root);
9503        if (speed_cap == PCI_SPEED_UNKNOWN)
9504                return;
9505
9506        if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507            (speed_cap != PCIE_SPEED_5_0GT))
9508                return;
9509
9510        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511        current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512                LC_CURRENT_DATA_RATE_SHIFT;
9513        if (speed_cap == PCIE_SPEED_8_0GT) {
9514                if (current_data_rate == 2) {
9515                        DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516                        return;
9517                }
9518                DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519        } else if (speed_cap == PCIE_SPEED_5_0GT) {
9520                if (current_data_rate == 1) {
9521                        DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522                        return;
9523                }
9524                DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525        }
9526
9527        if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9528                return;
9529
9530        if (speed_cap == PCIE_SPEED_8_0GT) {
9531                /* re-try equalization if gen3 is not already enabled */
9532                if (current_data_rate != 2) {
9533                        u16 bridge_cfg, gpu_cfg;
9534                        u16 bridge_cfg2, gpu_cfg2;
9535                        u32 max_lw, current_lw, tmp;
9536
9537                        pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9538                                                  &bridge_cfg);
9539                        pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9540                                                  &gpu_cfg);
9541
9542                        tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9543                        pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9544
9545                        tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9546                        pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9547                                                   tmp16);
9548
9549                        tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9550                        max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9551                        current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9552
9553                        if (current_lw < max_lw) {
9554                                tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9555                                if (tmp & LC_RENEGOTIATION_SUPPORT) {
9556                                        tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9557                                        tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9558                                        tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9559                                        WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9560                                }
9561                        }
9562
9563                        for (i = 0; i < 10; i++) {
9564                                /* check status */
9565                                pcie_capability_read_word(rdev->pdev,
9566                                                          PCI_EXP_DEVSTA,
9567                                                          &tmp16);
9568                                if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9569                                        break;
9570
9571                                pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9572                                                          &bridge_cfg);
9573                                pcie_capability_read_word(rdev->pdev,
9574                                                          PCI_EXP_LNKCTL,
9575                                                          &gpu_cfg);
9576
9577                                pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9578                                                          &bridge_cfg2);
9579                                pcie_capability_read_word(rdev->pdev,
9580                                                          PCI_EXP_LNKCTL2,
9581                                                          &gpu_cfg2);
9582
9583                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9584                                tmp |= LC_SET_QUIESCE;
9585                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9586
9587                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9588                                tmp |= LC_REDO_EQ;
9589                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9590
9591                                msleep(100);
9592
9593                                /* linkctl */
9594                                pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9595                                                          &tmp16);
9596                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9597                                tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9598                                pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9599                                                           tmp16);
9600
9601                                pcie_capability_read_word(rdev->pdev,
9602                                                          PCI_EXP_LNKCTL,
9603                                                          &tmp16);
9604                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9605                                tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9606                                pcie_capability_write_word(rdev->pdev,
9607                                                           PCI_EXP_LNKCTL,
9608                                                           tmp16);
9609
9610                                /* linkctl2 */
9611                                pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9612                                                          &tmp16);
9613                                tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9614                                           PCI_EXP_LNKCTL2_TX_MARGIN);
9615                                tmp16 |= (bridge_cfg2 &
9616                                          (PCI_EXP_LNKCTL2_ENTER_COMP |
9617                                           PCI_EXP_LNKCTL2_TX_MARGIN));
9618                                pcie_capability_write_word(root,
9619                                                           PCI_EXP_LNKCTL2,
9620                                                           tmp16);
9621
9622                                pcie_capability_read_word(rdev->pdev,
9623                                                          PCI_EXP_LNKCTL2,
9624                                                          &tmp16);
9625                                tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9626                                           PCI_EXP_LNKCTL2_TX_MARGIN);
9627                                tmp16 |= (gpu_cfg2 &
9628                                          (PCI_EXP_LNKCTL2_ENTER_COMP |
9629                                           PCI_EXP_LNKCTL2_TX_MARGIN));
9630                                pcie_capability_write_word(rdev->pdev,
9631                                                           PCI_EXP_LNKCTL2,
9632                                                           tmp16);
9633
9634                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635                                tmp &= ~LC_SET_QUIESCE;
9636                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637                        }
9638                }
9639        }
9640
9641        /* set the link speed */
9642        speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9643        speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9644        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9645
9646        pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9647        tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9648        if (speed_cap == PCIE_SPEED_8_0GT)
9649                tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9650        else if (speed_cap == PCIE_SPEED_5_0GT)
9651                tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9652        else
9653                tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9654        pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9655
9656        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9657        speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9658        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9659
9660        for (i = 0; i < rdev->usec_timeout; i++) {
9661                speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9662                if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9663                        break;
9664                udelay(1);
9665        }
9666}
9667
9668static void cik_program_aspm(struct radeon_device *rdev)
9669{
9670        u32 data, orig;
9671        bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9672        bool disable_clkreq = false;
9673
9674        if (radeon_aspm == 0)
9675                return;
9676
9677        /* XXX double check IGPs */
9678        if (rdev->flags & RADEON_IS_IGP)
9679                return;
9680
9681        if (!(rdev->flags & RADEON_IS_PCIE))
9682                return;
9683
9684        orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9685        data &= ~LC_XMIT_N_FTS_MASK;
9686        data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9687        if (orig != data)
9688                WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9689
9690        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9691        data |= LC_GO_TO_RECOVERY;
9692        if (orig != data)
9693                WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9694
9695        orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9696        data |= P_IGNORE_EDB_ERR;
9697        if (orig != data)
9698                WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9699
9700        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9701        data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9702        data |= LC_PMI_TO_L1_DIS;
9703        if (!disable_l0s)
9704                data |= LC_L0S_INACTIVITY(7);
9705
9706        if (!disable_l1) {
9707                data |= LC_L1_INACTIVITY(7);
9708                data &= ~LC_PMI_TO_L1_DIS;
9709                if (orig != data)
9710                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9711
9712                if (!disable_plloff_in_l1) {
9713                        bool clk_req_support;
9714
9715                        orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9716                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9717                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9718                        if (orig != data)
9719                                WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9720
9721                        orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9722                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9723                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9724                        if (orig != data)
9725                                WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9726
9727                        orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9728                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9729                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9730                        if (orig != data)
9731                                WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9732
9733                        orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9734                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9735                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9736                        if (orig != data)
9737                                WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9738
9739                        orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9740                        data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9741                        data |= LC_DYN_LANES_PWR_STATE(3);
9742                        if (orig != data)
9743                                WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9744
9745                        if (!disable_clkreq &&
9746                            !pci_is_root_bus(rdev->pdev->bus)) {
9747                                struct pci_dev *root = rdev->pdev->bus->self;
9748                                u32 lnkcap;
9749
9750                                clk_req_support = false;
9751                                pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9752                                if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9753                                        clk_req_support = true;
9754                        } else {
9755                                clk_req_support = false;
9756                        }
9757
9758                        if (clk_req_support) {
9759                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9760                                data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9761                                if (orig != data)
9762                                        WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9763
9764                                orig = data = RREG32_SMC(THM_CLK_CNTL);
9765                                data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9766                                data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9767                                if (orig != data)
9768                                        WREG32_SMC(THM_CLK_CNTL, data);
9769
9770                                orig = data = RREG32_SMC(MISC_CLK_CTRL);
9771                                data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9772                                data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9773                                if (orig != data)
9774                                        WREG32_SMC(MISC_CLK_CTRL, data);
9775
9776                                orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9777                                data &= ~BCLK_AS_XCLK;
9778                                if (orig != data)
9779                                        WREG32_SMC(CG_CLKPIN_CNTL, data);
9780
9781                                orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9782                                data &= ~FORCE_BIF_REFCLK_EN;
9783                                if (orig != data)
9784                                        WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9785
9786                                orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9787                                data &= ~MPLL_CLKOUT_SEL_MASK;
9788                                data |= MPLL_CLKOUT_SEL(4);
9789                                if (orig != data)
9790                                        WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9791                        }
9792                }
9793        } else {
9794                if (orig != data)
9795                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9796        }
9797
9798        orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9799        data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9800        if (orig != data)
9801                WREG32_PCIE_PORT(PCIE_CNTL2, data);
9802
9803        if (!disable_l0s) {
9804                data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9805                if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9806                        data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9807                        if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9808                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9809                                data &= ~LC_L0S_INACTIVITY_MASK;
9810                                if (orig != data)
9811                                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9812                        }
9813                }
9814        }
9815}
9816