linux/drivers/gpu/drm/radeon/cik.c
<<
>>
Prefs
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
  26#include <linux/slab.h>
  27#include <linux/module.h>
  28
  29#include <drm/drm_pci.h>
  30#include <drm/drm_vblank.h>
  31
  32#include "atom.h"
  33#include "cik_blit_shaders.h"
  34#include "cikd.h"
  35#include "clearstate_ci.h"
  36#include "radeon.h"
  37#include "radeon_asic.h"
  38#include "radeon_audio.h"
  39#include "radeon_ucode.h"
  40
  41#define SH_MEM_CONFIG_GFX_DEFAULT \
  42        ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  43
  44MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  47MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  48MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  49MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  50MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  51MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  52MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  53
  54MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  55MODULE_FIRMWARE("radeon/bonaire_me.bin");
  56MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  57MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  58MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  59MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  60MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  61MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  62MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  63
  64MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  66MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  67MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  68MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  69MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  70MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  71MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  72MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  73
  74MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  75MODULE_FIRMWARE("radeon/hawaii_me.bin");
  76MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  77MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  78MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  79MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  80MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  81MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  82MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  83
  84MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  85MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  86MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  87MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  88MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  89MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  90
  91MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  92MODULE_FIRMWARE("radeon/kaveri_me.bin");
  93MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  94MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  95MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  96MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  97MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  98
  99MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 100MODULE_FIRMWARE("radeon/KABINI_me.bin");
 101MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 102MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 103MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 104MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 105
 106MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 107MODULE_FIRMWARE("radeon/kabini_me.bin");
 108MODULE_FIRMWARE("radeon/kabini_ce.bin");
 109MODULE_FIRMWARE("radeon/kabini_mec.bin");
 110MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 111MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 112
 113MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 114MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 115MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 116MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 117MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 118MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 119
 120MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 121MODULE_FIRMWARE("radeon/mullins_me.bin");
 122MODULE_FIRMWARE("radeon/mullins_ce.bin");
 123MODULE_FIRMWARE("radeon/mullins_mec.bin");
 124MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 125MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 126
 127extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 128extern void r600_ih_ring_fini(struct radeon_device *rdev);
 129extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 130extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 131extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 132extern void sumo_rlc_fini(struct radeon_device *rdev);
 133extern int sumo_rlc_init(struct radeon_device *rdev);
 134extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 135extern void si_rlc_reset(struct radeon_device *rdev);
 136extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 137static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 138extern int cik_sdma_resume(struct radeon_device *rdev);
 139extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 140extern void cik_sdma_fini(struct radeon_device *rdev);
 141extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 142static void cik_rlc_stop(struct radeon_device *rdev);
 143static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 144static void cik_program_aspm(struct radeon_device *rdev);
 145static void cik_init_pg(struct radeon_device *rdev);
 146static void cik_init_cg(struct radeon_device *rdev);
 147static void cik_fini_pg(struct radeon_device *rdev);
 148static void cik_fini_cg(struct radeon_device *rdev);
 149static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 150                                          bool enable);
 151
 152/**
 153 * cik_get_allowed_info_register - fetch the register for the info ioctl
 154 *
 155 * @rdev: radeon_device pointer
 156 * @reg: register offset in bytes
 157 * @val: register value
 158 *
 159 * Returns 0 for success or -EINVAL for an invalid register
 160 *
 161 */
 162int cik_get_allowed_info_register(struct radeon_device *rdev,
 163                                  u32 reg, u32 *val)
 164{
 165        switch (reg) {
 166        case GRBM_STATUS:
 167        case GRBM_STATUS2:
 168        case GRBM_STATUS_SE0:
 169        case GRBM_STATUS_SE1:
 170        case GRBM_STATUS_SE2:
 171        case GRBM_STATUS_SE3:
 172        case SRBM_STATUS:
 173        case SRBM_STATUS2:
 174        case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 175        case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 176        case UVD_STATUS:
 177        /* TODO VCE */
 178                *val = RREG32(reg);
 179                return 0;
 180        default:
 181                return -EINVAL;
 182        }
 183}
 184
 185/*
 186 * Indirect registers accessor
 187 */
 188u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 189{
 190        unsigned long flags;
 191        u32 r;
 192
 193        spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 194        WREG32(CIK_DIDT_IND_INDEX, (reg));
 195        r = RREG32(CIK_DIDT_IND_DATA);
 196        spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 197        return r;
 198}
 199
 200void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 201{
 202        unsigned long flags;
 203
 204        spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 205        WREG32(CIK_DIDT_IND_INDEX, (reg));
 206        WREG32(CIK_DIDT_IND_DATA, (v));
 207        spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 208}
 209
 210/* get temperature in millidegrees */
 211int ci_get_temp(struct radeon_device *rdev)
 212{
 213        u32 temp;
 214        int actual_temp = 0;
 215
 216        temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 217                CTF_TEMP_SHIFT;
 218
 219        if (temp & 0x200)
 220                actual_temp = 255;
 221        else
 222                actual_temp = temp & 0x1ff;
 223
 224        actual_temp = actual_temp * 1000;
 225
 226        return actual_temp;
 227}
 228
 229/* get temperature in millidegrees */
 230int kv_get_temp(struct radeon_device *rdev)
 231{
 232        u32 temp;
 233        int actual_temp = 0;
 234
 235        temp = RREG32_SMC(0xC0300E0C);
 236
 237        if (temp)
 238                actual_temp = (temp / 8) - 49;
 239        else
 240                actual_temp = 0;
 241
 242        actual_temp = actual_temp * 1000;
 243
 244        return actual_temp;
 245}
 246
 247/*
 248 * Indirect registers accessor
 249 */
 250u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 251{
 252        unsigned long flags;
 253        u32 r;
 254
 255        spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 256        WREG32(PCIE_INDEX, reg);
 257        (void)RREG32(PCIE_INDEX);
 258        r = RREG32(PCIE_DATA);
 259        spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260        return r;
 261}
 262
 263void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 264{
 265        unsigned long flags;
 266
 267        spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 268        WREG32(PCIE_INDEX, reg);
 269        (void)RREG32(PCIE_INDEX);
 270        WREG32(PCIE_DATA, v);
 271        (void)RREG32(PCIE_DATA);
 272        spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 273}
 274
 275static const u32 spectre_rlc_save_restore_register_list[] =
 276{
 277        (0x0e00 << 16) | (0xc12c >> 2),
 278        0x00000000,
 279        (0x0e00 << 16) | (0xc140 >> 2),
 280        0x00000000,
 281        (0x0e00 << 16) | (0xc150 >> 2),
 282        0x00000000,
 283        (0x0e00 << 16) | (0xc15c >> 2),
 284        0x00000000,
 285        (0x0e00 << 16) | (0xc168 >> 2),
 286        0x00000000,
 287        (0x0e00 << 16) | (0xc170 >> 2),
 288        0x00000000,
 289        (0x0e00 << 16) | (0xc178 >> 2),
 290        0x00000000,
 291        (0x0e00 << 16) | (0xc204 >> 2),
 292        0x00000000,
 293        (0x0e00 << 16) | (0xc2b4 >> 2),
 294        0x00000000,
 295        (0x0e00 << 16) | (0xc2b8 >> 2),
 296        0x00000000,
 297        (0x0e00 << 16) | (0xc2bc >> 2),
 298        0x00000000,
 299        (0x0e00 << 16) | (0xc2c0 >> 2),
 300        0x00000000,
 301        (0x0e00 << 16) | (0x8228 >> 2),
 302        0x00000000,
 303        (0x0e00 << 16) | (0x829c >> 2),
 304        0x00000000,
 305        (0x0e00 << 16) | (0x869c >> 2),
 306        0x00000000,
 307        (0x0600 << 16) | (0x98f4 >> 2),
 308        0x00000000,
 309        (0x0e00 << 16) | (0x98f8 >> 2),
 310        0x00000000,
 311        (0x0e00 << 16) | (0x9900 >> 2),
 312        0x00000000,
 313        (0x0e00 << 16) | (0xc260 >> 2),
 314        0x00000000,
 315        (0x0e00 << 16) | (0x90e8 >> 2),
 316        0x00000000,
 317        (0x0e00 << 16) | (0x3c000 >> 2),
 318        0x00000000,
 319        (0x0e00 << 16) | (0x3c00c >> 2),
 320        0x00000000,
 321        (0x0e00 << 16) | (0x8c1c >> 2),
 322        0x00000000,
 323        (0x0e00 << 16) | (0x9700 >> 2),
 324        0x00000000,
 325        (0x0e00 << 16) | (0xcd20 >> 2),
 326        0x00000000,
 327        (0x4e00 << 16) | (0xcd20 >> 2),
 328        0x00000000,
 329        (0x5e00 << 16) | (0xcd20 >> 2),
 330        0x00000000,
 331        (0x6e00 << 16) | (0xcd20 >> 2),
 332        0x00000000,
 333        (0x7e00 << 16) | (0xcd20 >> 2),
 334        0x00000000,
 335        (0x8e00 << 16) | (0xcd20 >> 2),
 336        0x00000000,
 337        (0x9e00 << 16) | (0xcd20 >> 2),
 338        0x00000000,
 339        (0xae00 << 16) | (0xcd20 >> 2),
 340        0x00000000,
 341        (0xbe00 << 16) | (0xcd20 >> 2),
 342        0x00000000,
 343        (0x0e00 << 16) | (0x89bc >> 2),
 344        0x00000000,
 345        (0x0e00 << 16) | (0x8900 >> 2),
 346        0x00000000,
 347        0x3,
 348        (0x0e00 << 16) | (0xc130 >> 2),
 349        0x00000000,
 350        (0x0e00 << 16) | (0xc134 >> 2),
 351        0x00000000,
 352        (0x0e00 << 16) | (0xc1fc >> 2),
 353        0x00000000,
 354        (0x0e00 << 16) | (0xc208 >> 2),
 355        0x00000000,
 356        (0x0e00 << 16) | (0xc264 >> 2),
 357        0x00000000,
 358        (0x0e00 << 16) | (0xc268 >> 2),
 359        0x00000000,
 360        (0x0e00 << 16) | (0xc26c >> 2),
 361        0x00000000,
 362        (0x0e00 << 16) | (0xc270 >> 2),
 363        0x00000000,
 364        (0x0e00 << 16) | (0xc274 >> 2),
 365        0x00000000,
 366        (0x0e00 << 16) | (0xc278 >> 2),
 367        0x00000000,
 368        (0x0e00 << 16) | (0xc27c >> 2),
 369        0x00000000,
 370        (0x0e00 << 16) | (0xc280 >> 2),
 371        0x00000000,
 372        (0x0e00 << 16) | (0xc284 >> 2),
 373        0x00000000,
 374        (0x0e00 << 16) | (0xc288 >> 2),
 375        0x00000000,
 376        (0x0e00 << 16) | (0xc28c >> 2),
 377        0x00000000,
 378        (0x0e00 << 16) | (0xc290 >> 2),
 379        0x00000000,
 380        (0x0e00 << 16) | (0xc294 >> 2),
 381        0x00000000,
 382        (0x0e00 << 16) | (0xc298 >> 2),
 383        0x00000000,
 384        (0x0e00 << 16) | (0xc29c >> 2),
 385        0x00000000,
 386        (0x0e00 << 16) | (0xc2a0 >> 2),
 387        0x00000000,
 388        (0x0e00 << 16) | (0xc2a4 >> 2),
 389        0x00000000,
 390        (0x0e00 << 16) | (0xc2a8 >> 2),
 391        0x00000000,
 392        (0x0e00 << 16) | (0xc2ac  >> 2),
 393        0x00000000,
 394        (0x0e00 << 16) | (0xc2b0 >> 2),
 395        0x00000000,
 396        (0x0e00 << 16) | (0x301d0 >> 2),
 397        0x00000000,
 398        (0x0e00 << 16) | (0x30238 >> 2),
 399        0x00000000,
 400        (0x0e00 << 16) | (0x30250 >> 2),
 401        0x00000000,
 402        (0x0e00 << 16) | (0x30254 >> 2),
 403        0x00000000,
 404        (0x0e00 << 16) | (0x30258 >> 2),
 405        0x00000000,
 406        (0x0e00 << 16) | (0x3025c >> 2),
 407        0x00000000,
 408        (0x4e00 << 16) | (0xc900 >> 2),
 409        0x00000000,
 410        (0x5e00 << 16) | (0xc900 >> 2),
 411        0x00000000,
 412        (0x6e00 << 16) | (0xc900 >> 2),
 413        0x00000000,
 414        (0x7e00 << 16) | (0xc900 >> 2),
 415        0x00000000,
 416        (0x8e00 << 16) | (0xc900 >> 2),
 417        0x00000000,
 418        (0x9e00 << 16) | (0xc900 >> 2),
 419        0x00000000,
 420        (0xae00 << 16) | (0xc900 >> 2),
 421        0x00000000,
 422        (0xbe00 << 16) | (0xc900 >> 2),
 423        0x00000000,
 424        (0x4e00 << 16) | (0xc904 >> 2),
 425        0x00000000,
 426        (0x5e00 << 16) | (0xc904 >> 2),
 427        0x00000000,
 428        (0x6e00 << 16) | (0xc904 >> 2),
 429        0x00000000,
 430        (0x7e00 << 16) | (0xc904 >> 2),
 431        0x00000000,
 432        (0x8e00 << 16) | (0xc904 >> 2),
 433        0x00000000,
 434        (0x9e00 << 16) | (0xc904 >> 2),
 435        0x00000000,
 436        (0xae00 << 16) | (0xc904 >> 2),
 437        0x00000000,
 438        (0xbe00 << 16) | (0xc904 >> 2),
 439        0x00000000,
 440        (0x4e00 << 16) | (0xc908 >> 2),
 441        0x00000000,
 442        (0x5e00 << 16) | (0xc908 >> 2),
 443        0x00000000,
 444        (0x6e00 << 16) | (0xc908 >> 2),
 445        0x00000000,
 446        (0x7e00 << 16) | (0xc908 >> 2),
 447        0x00000000,
 448        (0x8e00 << 16) | (0xc908 >> 2),
 449        0x00000000,
 450        (0x9e00 << 16) | (0xc908 >> 2),
 451        0x00000000,
 452        (0xae00 << 16) | (0xc908 >> 2),
 453        0x00000000,
 454        (0xbe00 << 16) | (0xc908 >> 2),
 455        0x00000000,
 456        (0x4e00 << 16) | (0xc90c >> 2),
 457        0x00000000,
 458        (0x5e00 << 16) | (0xc90c >> 2),
 459        0x00000000,
 460        (0x6e00 << 16) | (0xc90c >> 2),
 461        0x00000000,
 462        (0x7e00 << 16) | (0xc90c >> 2),
 463        0x00000000,
 464        (0x8e00 << 16) | (0xc90c >> 2),
 465        0x00000000,
 466        (0x9e00 << 16) | (0xc90c >> 2),
 467        0x00000000,
 468        (0xae00 << 16) | (0xc90c >> 2),
 469        0x00000000,
 470        (0xbe00 << 16) | (0xc90c >> 2),
 471        0x00000000,
 472        (0x4e00 << 16) | (0xc910 >> 2),
 473        0x00000000,
 474        (0x5e00 << 16) | (0xc910 >> 2),
 475        0x00000000,
 476        (0x6e00 << 16) | (0xc910 >> 2),
 477        0x00000000,
 478        (0x7e00 << 16) | (0xc910 >> 2),
 479        0x00000000,
 480        (0x8e00 << 16) | (0xc910 >> 2),
 481        0x00000000,
 482        (0x9e00 << 16) | (0xc910 >> 2),
 483        0x00000000,
 484        (0xae00 << 16) | (0xc910 >> 2),
 485        0x00000000,
 486        (0xbe00 << 16) | (0xc910 >> 2),
 487        0x00000000,
 488        (0x0e00 << 16) | (0xc99c >> 2),
 489        0x00000000,
 490        (0x0e00 << 16) | (0x9834 >> 2),
 491        0x00000000,
 492        (0x0000 << 16) | (0x30f00 >> 2),
 493        0x00000000,
 494        (0x0001 << 16) | (0x30f00 >> 2),
 495        0x00000000,
 496        (0x0000 << 16) | (0x30f04 >> 2),
 497        0x00000000,
 498        (0x0001 << 16) | (0x30f04 >> 2),
 499        0x00000000,
 500        (0x0000 << 16) | (0x30f08 >> 2),
 501        0x00000000,
 502        (0x0001 << 16) | (0x30f08 >> 2),
 503        0x00000000,
 504        (0x0000 << 16) | (0x30f0c >> 2),
 505        0x00000000,
 506        (0x0001 << 16) | (0x30f0c >> 2),
 507        0x00000000,
 508        (0x0600 << 16) | (0x9b7c >> 2),
 509        0x00000000,
 510        (0x0e00 << 16) | (0x8a14 >> 2),
 511        0x00000000,
 512        (0x0e00 << 16) | (0x8a18 >> 2),
 513        0x00000000,
 514        (0x0600 << 16) | (0x30a00 >> 2),
 515        0x00000000,
 516        (0x0e00 << 16) | (0x8bf0 >> 2),
 517        0x00000000,
 518        (0x0e00 << 16) | (0x8bcc >> 2),
 519        0x00000000,
 520        (0x0e00 << 16) | (0x8b24 >> 2),
 521        0x00000000,
 522        (0x0e00 << 16) | (0x30a04 >> 2),
 523        0x00000000,
 524        (0x0600 << 16) | (0x30a10 >> 2),
 525        0x00000000,
 526        (0x0600 << 16) | (0x30a14 >> 2),
 527        0x00000000,
 528        (0x0600 << 16) | (0x30a18 >> 2),
 529        0x00000000,
 530        (0x0600 << 16) | (0x30a2c >> 2),
 531        0x00000000,
 532        (0x0e00 << 16) | (0xc700 >> 2),
 533        0x00000000,
 534        (0x0e00 << 16) | (0xc704 >> 2),
 535        0x00000000,
 536        (0x0e00 << 16) | (0xc708 >> 2),
 537        0x00000000,
 538        (0x0e00 << 16) | (0xc768 >> 2),
 539        0x00000000,
 540        (0x0400 << 16) | (0xc770 >> 2),
 541        0x00000000,
 542        (0x0400 << 16) | (0xc774 >> 2),
 543        0x00000000,
 544        (0x0400 << 16) | (0xc778 >> 2),
 545        0x00000000,
 546        (0x0400 << 16) | (0xc77c >> 2),
 547        0x00000000,
 548        (0x0400 << 16) | (0xc780 >> 2),
 549        0x00000000,
 550        (0x0400 << 16) | (0xc784 >> 2),
 551        0x00000000,
 552        (0x0400 << 16) | (0xc788 >> 2),
 553        0x00000000,
 554        (0x0400 << 16) | (0xc78c >> 2),
 555        0x00000000,
 556        (0x0400 << 16) | (0xc798 >> 2),
 557        0x00000000,
 558        (0x0400 << 16) | (0xc79c >> 2),
 559        0x00000000,
 560        (0x0400 << 16) | (0xc7a0 >> 2),
 561        0x00000000,
 562        (0x0400 << 16) | (0xc7a4 >> 2),
 563        0x00000000,
 564        (0x0400 << 16) | (0xc7a8 >> 2),
 565        0x00000000,
 566        (0x0400 << 16) | (0xc7ac >> 2),
 567        0x00000000,
 568        (0x0400 << 16) | (0xc7b0 >> 2),
 569        0x00000000,
 570        (0x0400 << 16) | (0xc7b4 >> 2),
 571        0x00000000,
 572        (0x0e00 << 16) | (0x9100 >> 2),
 573        0x00000000,
 574        (0x0e00 << 16) | (0x3c010 >> 2),
 575        0x00000000,
 576        (0x0e00 << 16) | (0x92a8 >> 2),
 577        0x00000000,
 578        (0x0e00 << 16) | (0x92ac >> 2),
 579        0x00000000,
 580        (0x0e00 << 16) | (0x92b4 >> 2),
 581        0x00000000,
 582        (0x0e00 << 16) | (0x92b8 >> 2),
 583        0x00000000,
 584        (0x0e00 << 16) | (0x92bc >> 2),
 585        0x00000000,
 586        (0x0e00 << 16) | (0x92c0 >> 2),
 587        0x00000000,
 588        (0x0e00 << 16) | (0x92c4 >> 2),
 589        0x00000000,
 590        (0x0e00 << 16) | (0x92c8 >> 2),
 591        0x00000000,
 592        (0x0e00 << 16) | (0x92cc >> 2),
 593        0x00000000,
 594        (0x0e00 << 16) | (0x92d0 >> 2),
 595        0x00000000,
 596        (0x0e00 << 16) | (0x8c00 >> 2),
 597        0x00000000,
 598        (0x0e00 << 16) | (0x8c04 >> 2),
 599        0x00000000,
 600        (0x0e00 << 16) | (0x8c20 >> 2),
 601        0x00000000,
 602        (0x0e00 << 16) | (0x8c38 >> 2),
 603        0x00000000,
 604        (0x0e00 << 16) | (0x8c3c >> 2),
 605        0x00000000,
 606        (0x0e00 << 16) | (0xae00 >> 2),
 607        0x00000000,
 608        (0x0e00 << 16) | (0x9604 >> 2),
 609        0x00000000,
 610        (0x0e00 << 16) | (0xac08 >> 2),
 611        0x00000000,
 612        (0x0e00 << 16) | (0xac0c >> 2),
 613        0x00000000,
 614        (0x0e00 << 16) | (0xac10 >> 2),
 615        0x00000000,
 616        (0x0e00 << 16) | (0xac14 >> 2),
 617        0x00000000,
 618        (0x0e00 << 16) | (0xac58 >> 2),
 619        0x00000000,
 620        (0x0e00 << 16) | (0xac68 >> 2),
 621        0x00000000,
 622        (0x0e00 << 16) | (0xac6c >> 2),
 623        0x00000000,
 624        (0x0e00 << 16) | (0xac70 >> 2),
 625        0x00000000,
 626        (0x0e00 << 16) | (0xac74 >> 2),
 627        0x00000000,
 628        (0x0e00 << 16) | (0xac78 >> 2),
 629        0x00000000,
 630        (0x0e00 << 16) | (0xac7c >> 2),
 631        0x00000000,
 632        (0x0e00 << 16) | (0xac80 >> 2),
 633        0x00000000,
 634        (0x0e00 << 16) | (0xac84 >> 2),
 635        0x00000000,
 636        (0x0e00 << 16) | (0xac88 >> 2),
 637        0x00000000,
 638        (0x0e00 << 16) | (0xac8c >> 2),
 639        0x00000000,
 640        (0x0e00 << 16) | (0x970c >> 2),
 641        0x00000000,
 642        (0x0e00 << 16) | (0x9714 >> 2),
 643        0x00000000,
 644        (0x0e00 << 16) | (0x9718 >> 2),
 645        0x00000000,
 646        (0x0e00 << 16) | (0x971c >> 2),
 647        0x00000000,
 648        (0x0e00 << 16) | (0x31068 >> 2),
 649        0x00000000,
 650        (0x4e00 << 16) | (0x31068 >> 2),
 651        0x00000000,
 652        (0x5e00 << 16) | (0x31068 >> 2),
 653        0x00000000,
 654        (0x6e00 << 16) | (0x31068 >> 2),
 655        0x00000000,
 656        (0x7e00 << 16) | (0x31068 >> 2),
 657        0x00000000,
 658        (0x8e00 << 16) | (0x31068 >> 2),
 659        0x00000000,
 660        (0x9e00 << 16) | (0x31068 >> 2),
 661        0x00000000,
 662        (0xae00 << 16) | (0x31068 >> 2),
 663        0x00000000,
 664        (0xbe00 << 16) | (0x31068 >> 2),
 665        0x00000000,
 666        (0x0e00 << 16) | (0xcd10 >> 2),
 667        0x00000000,
 668        (0x0e00 << 16) | (0xcd14 >> 2),
 669        0x00000000,
 670        (0x0e00 << 16) | (0x88b0 >> 2),
 671        0x00000000,
 672        (0x0e00 << 16) | (0x88b4 >> 2),
 673        0x00000000,
 674        (0x0e00 << 16) | (0x88b8 >> 2),
 675        0x00000000,
 676        (0x0e00 << 16) | (0x88bc >> 2),
 677        0x00000000,
 678        (0x0400 << 16) | (0x89c0 >> 2),
 679        0x00000000,
 680        (0x0e00 << 16) | (0x88c4 >> 2),
 681        0x00000000,
 682        (0x0e00 << 16) | (0x88c8 >> 2),
 683        0x00000000,
 684        (0x0e00 << 16) | (0x88d0 >> 2),
 685        0x00000000,
 686        (0x0e00 << 16) | (0x88d4 >> 2),
 687        0x00000000,
 688        (0x0e00 << 16) | (0x88d8 >> 2),
 689        0x00000000,
 690        (0x0e00 << 16) | (0x8980 >> 2),
 691        0x00000000,
 692        (0x0e00 << 16) | (0x30938 >> 2),
 693        0x00000000,
 694        (0x0e00 << 16) | (0x3093c >> 2),
 695        0x00000000,
 696        (0x0e00 << 16) | (0x30940 >> 2),
 697        0x00000000,
 698        (0x0e00 << 16) | (0x89a0 >> 2),
 699        0x00000000,
 700        (0x0e00 << 16) | (0x30900 >> 2),
 701        0x00000000,
 702        (0x0e00 << 16) | (0x30904 >> 2),
 703        0x00000000,
 704        (0x0e00 << 16) | (0x89b4 >> 2),
 705        0x00000000,
 706        (0x0e00 << 16) | (0x3c210 >> 2),
 707        0x00000000,
 708        (0x0e00 << 16) | (0x3c214 >> 2),
 709        0x00000000,
 710        (0x0e00 << 16) | (0x3c218 >> 2),
 711        0x00000000,
 712        (0x0e00 << 16) | (0x8904 >> 2),
 713        0x00000000,
 714        0x5,
 715        (0x0e00 << 16) | (0x8c28 >> 2),
 716        (0x0e00 << 16) | (0x8c2c >> 2),
 717        (0x0e00 << 16) | (0x8c30 >> 2),
 718        (0x0e00 << 16) | (0x8c34 >> 2),
 719        (0x0e00 << 16) | (0x9600 >> 2),
 720};
 721
 722static const u32 kalindi_rlc_save_restore_register_list[] =
 723{
 724        (0x0e00 << 16) | (0xc12c >> 2),
 725        0x00000000,
 726        (0x0e00 << 16) | (0xc140 >> 2),
 727        0x00000000,
 728        (0x0e00 << 16) | (0xc150 >> 2),
 729        0x00000000,
 730        (0x0e00 << 16) | (0xc15c >> 2),
 731        0x00000000,
 732        (0x0e00 << 16) | (0xc168 >> 2),
 733        0x00000000,
 734        (0x0e00 << 16) | (0xc170 >> 2),
 735        0x00000000,
 736        (0x0e00 << 16) | (0xc204 >> 2),
 737        0x00000000,
 738        (0x0e00 << 16) | (0xc2b4 >> 2),
 739        0x00000000,
 740        (0x0e00 << 16) | (0xc2b8 >> 2),
 741        0x00000000,
 742        (0x0e00 << 16) | (0xc2bc >> 2),
 743        0x00000000,
 744        (0x0e00 << 16) | (0xc2c0 >> 2),
 745        0x00000000,
 746        (0x0e00 << 16) | (0x8228 >> 2),
 747        0x00000000,
 748        (0x0e00 << 16) | (0x829c >> 2),
 749        0x00000000,
 750        (0x0e00 << 16) | (0x869c >> 2),
 751        0x00000000,
 752        (0x0600 << 16) | (0x98f4 >> 2),
 753        0x00000000,
 754        (0x0e00 << 16) | (0x98f8 >> 2),
 755        0x00000000,
 756        (0x0e00 << 16) | (0x9900 >> 2),
 757        0x00000000,
 758        (0x0e00 << 16) | (0xc260 >> 2),
 759        0x00000000,
 760        (0x0e00 << 16) | (0x90e8 >> 2),
 761        0x00000000,
 762        (0x0e00 << 16) | (0x3c000 >> 2),
 763        0x00000000,
 764        (0x0e00 << 16) | (0x3c00c >> 2),
 765        0x00000000,
 766        (0x0e00 << 16) | (0x8c1c >> 2),
 767        0x00000000,
 768        (0x0e00 << 16) | (0x9700 >> 2),
 769        0x00000000,
 770        (0x0e00 << 16) | (0xcd20 >> 2),
 771        0x00000000,
 772        (0x4e00 << 16) | (0xcd20 >> 2),
 773        0x00000000,
 774        (0x5e00 << 16) | (0xcd20 >> 2),
 775        0x00000000,
 776        (0x6e00 << 16) | (0xcd20 >> 2),
 777        0x00000000,
 778        (0x7e00 << 16) | (0xcd20 >> 2),
 779        0x00000000,
 780        (0x0e00 << 16) | (0x89bc >> 2),
 781        0x00000000,
 782        (0x0e00 << 16) | (0x8900 >> 2),
 783        0x00000000,
 784        0x3,
 785        (0x0e00 << 16) | (0xc130 >> 2),
 786        0x00000000,
 787        (0x0e00 << 16) | (0xc134 >> 2),
 788        0x00000000,
 789        (0x0e00 << 16) | (0xc1fc >> 2),
 790        0x00000000,
 791        (0x0e00 << 16) | (0xc208 >> 2),
 792        0x00000000,
 793        (0x0e00 << 16) | (0xc264 >> 2),
 794        0x00000000,
 795        (0x0e00 << 16) | (0xc268 >> 2),
 796        0x00000000,
 797        (0x0e00 << 16) | (0xc26c >> 2),
 798        0x00000000,
 799        (0x0e00 << 16) | (0xc270 >> 2),
 800        0x00000000,
 801        (0x0e00 << 16) | (0xc274 >> 2),
 802        0x00000000,
 803        (0x0e00 << 16) | (0xc28c >> 2),
 804        0x00000000,
 805        (0x0e00 << 16) | (0xc290 >> 2),
 806        0x00000000,
 807        (0x0e00 << 16) | (0xc294 >> 2),
 808        0x00000000,
 809        (0x0e00 << 16) | (0xc298 >> 2),
 810        0x00000000,
 811        (0x0e00 << 16) | (0xc2a0 >> 2),
 812        0x00000000,
 813        (0x0e00 << 16) | (0xc2a4 >> 2),
 814        0x00000000,
 815        (0x0e00 << 16) | (0xc2a8 >> 2),
 816        0x00000000,
 817        (0x0e00 << 16) | (0xc2ac >> 2),
 818        0x00000000,
 819        (0x0e00 << 16) | (0x301d0 >> 2),
 820        0x00000000,
 821        (0x0e00 << 16) | (0x30238 >> 2),
 822        0x00000000,
 823        (0x0e00 << 16) | (0x30250 >> 2),
 824        0x00000000,
 825        (0x0e00 << 16) | (0x30254 >> 2),
 826        0x00000000,
 827        (0x0e00 << 16) | (0x30258 >> 2),
 828        0x00000000,
 829        (0x0e00 << 16) | (0x3025c >> 2),
 830        0x00000000,
 831        (0x4e00 << 16) | (0xc900 >> 2),
 832        0x00000000,
 833        (0x5e00 << 16) | (0xc900 >> 2),
 834        0x00000000,
 835        (0x6e00 << 16) | (0xc900 >> 2),
 836        0x00000000,
 837        (0x7e00 << 16) | (0xc900 >> 2),
 838        0x00000000,
 839        (0x4e00 << 16) | (0xc904 >> 2),
 840        0x00000000,
 841        (0x5e00 << 16) | (0xc904 >> 2),
 842        0x00000000,
 843        (0x6e00 << 16) | (0xc904 >> 2),
 844        0x00000000,
 845        (0x7e00 << 16) | (0xc904 >> 2),
 846        0x00000000,
 847        (0x4e00 << 16) | (0xc908 >> 2),
 848        0x00000000,
 849        (0x5e00 << 16) | (0xc908 >> 2),
 850        0x00000000,
 851        (0x6e00 << 16) | (0xc908 >> 2),
 852        0x00000000,
 853        (0x7e00 << 16) | (0xc908 >> 2),
 854        0x00000000,
 855        (0x4e00 << 16) | (0xc90c >> 2),
 856        0x00000000,
 857        (0x5e00 << 16) | (0xc90c >> 2),
 858        0x00000000,
 859        (0x6e00 << 16) | (0xc90c >> 2),
 860        0x00000000,
 861        (0x7e00 << 16) | (0xc90c >> 2),
 862        0x00000000,
 863        (0x4e00 << 16) | (0xc910 >> 2),
 864        0x00000000,
 865        (0x5e00 << 16) | (0xc910 >> 2),
 866        0x00000000,
 867        (0x6e00 << 16) | (0xc910 >> 2),
 868        0x00000000,
 869        (0x7e00 << 16) | (0xc910 >> 2),
 870        0x00000000,
 871        (0x0e00 << 16) | (0xc99c >> 2),
 872        0x00000000,
 873        (0x0e00 << 16) | (0x9834 >> 2),
 874        0x00000000,
 875        (0x0000 << 16) | (0x30f00 >> 2),
 876        0x00000000,
 877        (0x0000 << 16) | (0x30f04 >> 2),
 878        0x00000000,
 879        (0x0000 << 16) | (0x30f08 >> 2),
 880        0x00000000,
 881        (0x0000 << 16) | (0x30f0c >> 2),
 882        0x00000000,
 883        (0x0600 << 16) | (0x9b7c >> 2),
 884        0x00000000,
 885        (0x0e00 << 16) | (0x8a14 >> 2),
 886        0x00000000,
 887        (0x0e00 << 16) | (0x8a18 >> 2),
 888        0x00000000,
 889        (0x0600 << 16) | (0x30a00 >> 2),
 890        0x00000000,
 891        (0x0e00 << 16) | (0x8bf0 >> 2),
 892        0x00000000,
 893        (0x0e00 << 16) | (0x8bcc >> 2),
 894        0x00000000,
 895        (0x0e00 << 16) | (0x8b24 >> 2),
 896        0x00000000,
 897        (0x0e00 << 16) | (0x30a04 >> 2),
 898        0x00000000,
 899        (0x0600 << 16) | (0x30a10 >> 2),
 900        0x00000000,
 901        (0x0600 << 16) | (0x30a14 >> 2),
 902        0x00000000,
 903        (0x0600 << 16) | (0x30a18 >> 2),
 904        0x00000000,
 905        (0x0600 << 16) | (0x30a2c >> 2),
 906        0x00000000,
 907        (0x0e00 << 16) | (0xc700 >> 2),
 908        0x00000000,
 909        (0x0e00 << 16) | (0xc704 >> 2),
 910        0x00000000,
 911        (0x0e00 << 16) | (0xc708 >> 2),
 912        0x00000000,
 913        (0x0e00 << 16) | (0xc768 >> 2),
 914        0x00000000,
 915        (0x0400 << 16) | (0xc770 >> 2),
 916        0x00000000,
 917        (0x0400 << 16) | (0xc774 >> 2),
 918        0x00000000,
 919        (0x0400 << 16) | (0xc798 >> 2),
 920        0x00000000,
 921        (0x0400 << 16) | (0xc79c >> 2),
 922        0x00000000,
 923        (0x0e00 << 16) | (0x9100 >> 2),
 924        0x00000000,
 925        (0x0e00 << 16) | (0x3c010 >> 2),
 926        0x00000000,
 927        (0x0e00 << 16) | (0x8c00 >> 2),
 928        0x00000000,
 929        (0x0e00 << 16) | (0x8c04 >> 2),
 930        0x00000000,
 931        (0x0e00 << 16) | (0x8c20 >> 2),
 932        0x00000000,
 933        (0x0e00 << 16) | (0x8c38 >> 2),
 934        0x00000000,
 935        (0x0e00 << 16) | (0x8c3c >> 2),
 936        0x00000000,
 937        (0x0e00 << 16) | (0xae00 >> 2),
 938        0x00000000,
 939        (0x0e00 << 16) | (0x9604 >> 2),
 940        0x00000000,
 941        (0x0e00 << 16) | (0xac08 >> 2),
 942        0x00000000,
 943        (0x0e00 << 16) | (0xac0c >> 2),
 944        0x00000000,
 945        (0x0e00 << 16) | (0xac10 >> 2),
 946        0x00000000,
 947        (0x0e00 << 16) | (0xac14 >> 2),
 948        0x00000000,
 949        (0x0e00 << 16) | (0xac58 >> 2),
 950        0x00000000,
 951        (0x0e00 << 16) | (0xac68 >> 2),
 952        0x00000000,
 953        (0x0e00 << 16) | (0xac6c >> 2),
 954        0x00000000,
 955        (0x0e00 << 16) | (0xac70 >> 2),
 956        0x00000000,
 957        (0x0e00 << 16) | (0xac74 >> 2),
 958        0x00000000,
 959        (0x0e00 << 16) | (0xac78 >> 2),
 960        0x00000000,
 961        (0x0e00 << 16) | (0xac7c >> 2),
 962        0x00000000,
 963        (0x0e00 << 16) | (0xac80 >> 2),
 964        0x00000000,
 965        (0x0e00 << 16) | (0xac84 >> 2),
 966        0x00000000,
 967        (0x0e00 << 16) | (0xac88 >> 2),
 968        0x00000000,
 969        (0x0e00 << 16) | (0xac8c >> 2),
 970        0x00000000,
 971        (0x0e00 << 16) | (0x970c >> 2),
 972        0x00000000,
 973        (0x0e00 << 16) | (0x9714 >> 2),
 974        0x00000000,
 975        (0x0e00 << 16) | (0x9718 >> 2),
 976        0x00000000,
 977        (0x0e00 << 16) | (0x971c >> 2),
 978        0x00000000,
 979        (0x0e00 << 16) | (0x31068 >> 2),
 980        0x00000000,
 981        (0x4e00 << 16) | (0x31068 >> 2),
 982        0x00000000,
 983        (0x5e00 << 16) | (0x31068 >> 2),
 984        0x00000000,
 985        (0x6e00 << 16) | (0x31068 >> 2),
 986        0x00000000,
 987        (0x7e00 << 16) | (0x31068 >> 2),
 988        0x00000000,
 989        (0x0e00 << 16) | (0xcd10 >> 2),
 990        0x00000000,
 991        (0x0e00 << 16) | (0xcd14 >> 2),
 992        0x00000000,
 993        (0x0e00 << 16) | (0x88b0 >> 2),
 994        0x00000000,
 995        (0x0e00 << 16) | (0x88b4 >> 2),
 996        0x00000000,
 997        (0x0e00 << 16) | (0x88b8 >> 2),
 998        0x00000000,
 999        (0x0e00 << 16) | (0x88bc >> 2),
1000        0x00000000,
1001        (0x0400 << 16) | (0x89c0 >> 2),
1002        0x00000000,
1003        (0x0e00 << 16) | (0x88c4 >> 2),
1004        0x00000000,
1005        (0x0e00 << 16) | (0x88c8 >> 2),
1006        0x00000000,
1007        (0x0e00 << 16) | (0x88d0 >> 2),
1008        0x00000000,
1009        (0x0e00 << 16) | (0x88d4 >> 2),
1010        0x00000000,
1011        (0x0e00 << 16) | (0x88d8 >> 2),
1012        0x00000000,
1013        (0x0e00 << 16) | (0x8980 >> 2),
1014        0x00000000,
1015        (0x0e00 << 16) | (0x30938 >> 2),
1016        0x00000000,
1017        (0x0e00 << 16) | (0x3093c >> 2),
1018        0x00000000,
1019        (0x0e00 << 16) | (0x30940 >> 2),
1020        0x00000000,
1021        (0x0e00 << 16) | (0x89a0 >> 2),
1022        0x00000000,
1023        (0x0e00 << 16) | (0x30900 >> 2),
1024        0x00000000,
1025        (0x0e00 << 16) | (0x30904 >> 2),
1026        0x00000000,
1027        (0x0e00 << 16) | (0x89b4 >> 2),
1028        0x00000000,
1029        (0x0e00 << 16) | (0x3e1fc >> 2),
1030        0x00000000,
1031        (0x0e00 << 16) | (0x3c210 >> 2),
1032        0x00000000,
1033        (0x0e00 << 16) | (0x3c214 >> 2),
1034        0x00000000,
1035        (0x0e00 << 16) | (0x3c218 >> 2),
1036        0x00000000,
1037        (0x0e00 << 16) | (0x8904 >> 2),
1038        0x00000000,
1039        0x5,
1040        (0x0e00 << 16) | (0x8c28 >> 2),
1041        (0x0e00 << 16) | (0x8c2c >> 2),
1042        (0x0e00 << 16) | (0x8c30 >> 2),
1043        (0x0e00 << 16) | (0x8c34 >> 2),
1044        (0x0e00 << 16) | (0x9600 >> 2),
1045};
1046
1047static const u32 bonaire_golden_spm_registers[] =
1048{
1049        0x30800, 0xe0ffffff, 0xe0000000
1050};
1051
1052static const u32 bonaire_golden_common_registers[] =
1053{
1054        0xc770, 0xffffffff, 0x00000800,
1055        0xc774, 0xffffffff, 0x00000800,
1056        0xc798, 0xffffffff, 0x00007fbf,
1057        0xc79c, 0xffffffff, 0x00007faf
1058};
1059
1060static const u32 bonaire_golden_registers[] =
1061{
1062        0x3354, 0x00000333, 0x00000333,
1063        0x3350, 0x000c0fc0, 0x00040200,
1064        0x9a10, 0x00010000, 0x00058208,
1065        0x3c000, 0xffff1fff, 0x00140000,
1066        0x3c200, 0xfdfc0fff, 0x00000100,
1067        0x3c234, 0x40000000, 0x40000200,
1068        0x9830, 0xffffffff, 0x00000000,
1069        0x9834, 0xf00fffff, 0x00000400,
1070        0x9838, 0x0002021c, 0x00020200,
1071        0xc78, 0x00000080, 0x00000000,
1072        0x5bb0, 0x000000f0, 0x00000070,
1073        0x5bc0, 0xf0311fff, 0x80300000,
1074        0x98f8, 0x73773777, 0x12010001,
1075        0x350c, 0x00810000, 0x408af000,
1076        0x7030, 0x31000111, 0x00000011,
1077        0x2f48, 0x73773777, 0x12010001,
1078        0x220c, 0x00007fb6, 0x0021a1b1,
1079        0x2210, 0x00007fb6, 0x002021b1,
1080        0x2180, 0x00007fb6, 0x00002191,
1081        0x2218, 0x00007fb6, 0x002121b1,
1082        0x221c, 0x00007fb6, 0x002021b1,
1083        0x21dc, 0x00007fb6, 0x00002191,
1084        0x21e0, 0x00007fb6, 0x00002191,
1085        0x3628, 0x0000003f, 0x0000000a,
1086        0x362c, 0x0000003f, 0x0000000a,
1087        0x2ae4, 0x00073ffe, 0x000022a2,
1088        0x240c, 0x000007ff, 0x00000000,
1089        0x8a14, 0xf000003f, 0x00000007,
1090        0x8bf0, 0x00002001, 0x00000001,
1091        0x8b24, 0xffffffff, 0x00ffffff,
1092        0x30a04, 0x0000ff0f, 0x00000000,
1093        0x28a4c, 0x07ffffff, 0x06000000,
1094        0x4d8, 0x00000fff, 0x00000100,
1095        0x3e78, 0x00000001, 0x00000002,
1096        0x9100, 0x03000000, 0x0362c688,
1097        0x8c00, 0x000000ff, 0x00000001,
1098        0xe40, 0x00001fff, 0x00001fff,
1099        0x9060, 0x0000007f, 0x00000020,
1100        0x9508, 0x00010000, 0x00010000,
1101        0xac14, 0x000003ff, 0x000000f3,
1102        0xac0c, 0xffffffff, 0x00001032
1103};
1104
1105static const u32 bonaire_mgcg_cgcg_init[] =
1106{
1107        0xc420, 0xffffffff, 0xfffffffc,
1108        0x30800, 0xffffffff, 0xe0000000,
1109        0x3c2a0, 0xffffffff, 0x00000100,
1110        0x3c208, 0xffffffff, 0x00000100,
1111        0x3c2c0, 0xffffffff, 0xc0000100,
1112        0x3c2c8, 0xffffffff, 0xc0000100,
1113        0x3c2c4, 0xffffffff, 0xc0000100,
1114        0x55e4, 0xffffffff, 0x00600100,
1115        0x3c280, 0xffffffff, 0x00000100,
1116        0x3c214, 0xffffffff, 0x06000100,
1117        0x3c220, 0xffffffff, 0x00000100,
1118        0x3c218, 0xffffffff, 0x06000100,
1119        0x3c204, 0xffffffff, 0x00000100,
1120        0x3c2e0, 0xffffffff, 0x00000100,
1121        0x3c224, 0xffffffff, 0x00000100,
1122        0x3c200, 0xffffffff, 0x00000100,
1123        0x3c230, 0xffffffff, 0x00000100,
1124        0x3c234, 0xffffffff, 0x00000100,
1125        0x3c250, 0xffffffff, 0x00000100,
1126        0x3c254, 0xffffffff, 0x00000100,
1127        0x3c258, 0xffffffff, 0x00000100,
1128        0x3c25c, 0xffffffff, 0x00000100,
1129        0x3c260, 0xffffffff, 0x00000100,
1130        0x3c27c, 0xffffffff, 0x00000100,
1131        0x3c278, 0xffffffff, 0x00000100,
1132        0x3c210, 0xffffffff, 0x06000100,
1133        0x3c290, 0xffffffff, 0x00000100,
1134        0x3c274, 0xffffffff, 0x00000100,
1135        0x3c2b4, 0xffffffff, 0x00000100,
1136        0x3c2b0, 0xffffffff, 0x00000100,
1137        0x3c270, 0xffffffff, 0x00000100,
1138        0x30800, 0xffffffff, 0xe0000000,
1139        0x3c020, 0xffffffff, 0x00010000,
1140        0x3c024, 0xffffffff, 0x00030002,
1141        0x3c028, 0xffffffff, 0x00040007,
1142        0x3c02c, 0xffffffff, 0x00060005,
1143        0x3c030, 0xffffffff, 0x00090008,
1144        0x3c034, 0xffffffff, 0x00010000,
1145        0x3c038, 0xffffffff, 0x00030002,
1146        0x3c03c, 0xffffffff, 0x00040007,
1147        0x3c040, 0xffffffff, 0x00060005,
1148        0x3c044, 0xffffffff, 0x00090008,
1149        0x3c048, 0xffffffff, 0x00010000,
1150        0x3c04c, 0xffffffff, 0x00030002,
1151        0x3c050, 0xffffffff, 0x00040007,
1152        0x3c054, 0xffffffff, 0x00060005,
1153        0x3c058, 0xffffffff, 0x00090008,
1154        0x3c05c, 0xffffffff, 0x00010000,
1155        0x3c060, 0xffffffff, 0x00030002,
1156        0x3c064, 0xffffffff, 0x00040007,
1157        0x3c068, 0xffffffff, 0x00060005,
1158        0x3c06c, 0xffffffff, 0x00090008,
1159        0x3c070, 0xffffffff, 0x00010000,
1160        0x3c074, 0xffffffff, 0x00030002,
1161        0x3c078, 0xffffffff, 0x00040007,
1162        0x3c07c, 0xffffffff, 0x00060005,
1163        0x3c080, 0xffffffff, 0x00090008,
1164        0x3c084, 0xffffffff, 0x00010000,
1165        0x3c088, 0xffffffff, 0x00030002,
1166        0x3c08c, 0xffffffff, 0x00040007,
1167        0x3c090, 0xffffffff, 0x00060005,
1168        0x3c094, 0xffffffff, 0x00090008,
1169        0x3c098, 0xffffffff, 0x00010000,
1170        0x3c09c, 0xffffffff, 0x00030002,
1171        0x3c0a0, 0xffffffff, 0x00040007,
1172        0x3c0a4, 0xffffffff, 0x00060005,
1173        0x3c0a8, 0xffffffff, 0x00090008,
1174        0x3c000, 0xffffffff, 0x96e00200,
1175        0x8708, 0xffffffff, 0x00900100,
1176        0xc424, 0xffffffff, 0x0020003f,
1177        0x38, 0xffffffff, 0x0140001c,
1178        0x3c, 0x000f0000, 0x000f0000,
1179        0x220, 0xffffffff, 0xC060000C,
1180        0x224, 0xc0000fff, 0x00000100,
1181        0xf90, 0xffffffff, 0x00000100,
1182        0xf98, 0x00000101, 0x00000000,
1183        0x20a8, 0xffffffff, 0x00000104,
1184        0x55e4, 0xff000fff, 0x00000100,
1185        0x30cc, 0xc0000fff, 0x00000104,
1186        0xc1e4, 0x00000001, 0x00000001,
1187        0xd00c, 0xff000ff0, 0x00000100,
1188        0xd80c, 0xff000ff0, 0x00000100
1189};
1190
1191static const u32 spectre_golden_spm_registers[] =
1192{
1193        0x30800, 0xe0ffffff, 0xe0000000
1194};
1195
1196static const u32 spectre_golden_common_registers[] =
1197{
1198        0xc770, 0xffffffff, 0x00000800,
1199        0xc774, 0xffffffff, 0x00000800,
1200        0xc798, 0xffffffff, 0x00007fbf,
1201        0xc79c, 0xffffffff, 0x00007faf
1202};
1203
1204static const u32 spectre_golden_registers[] =
1205{
1206        0x3c000, 0xffff1fff, 0x96940200,
1207        0x3c00c, 0xffff0001, 0xff000000,
1208        0x3c200, 0xfffc0fff, 0x00000100,
1209        0x6ed8, 0x00010101, 0x00010000,
1210        0x9834, 0xf00fffff, 0x00000400,
1211        0x9838, 0xfffffffc, 0x00020200,
1212        0x5bb0, 0x000000f0, 0x00000070,
1213        0x5bc0, 0xf0311fff, 0x80300000,
1214        0x98f8, 0x73773777, 0x12010001,
1215        0x9b7c, 0x00ff0000, 0x00fc0000,
1216        0x2f48, 0x73773777, 0x12010001,
1217        0x8a14, 0xf000003f, 0x00000007,
1218        0x8b24, 0xffffffff, 0x00ffffff,
1219        0x28350, 0x3f3f3fff, 0x00000082,
1220        0x28354, 0x0000003f, 0x00000000,
1221        0x3e78, 0x00000001, 0x00000002,
1222        0x913c, 0xffff03df, 0x00000004,
1223        0xc768, 0x00000008, 0x00000008,
1224        0x8c00, 0x000008ff, 0x00000800,
1225        0x9508, 0x00010000, 0x00010000,
1226        0xac0c, 0xffffffff, 0x54763210,
1227        0x214f8, 0x01ff01ff, 0x00000002,
1228        0x21498, 0x007ff800, 0x00200000,
1229        0x2015c, 0xffffffff, 0x00000f40,
1230        0x30934, 0xffffffff, 0x00000001
1231};
1232
1233static const u32 spectre_mgcg_cgcg_init[] =
1234{
1235        0xc420, 0xffffffff, 0xfffffffc,
1236        0x30800, 0xffffffff, 0xe0000000,
1237        0x3c2a0, 0xffffffff, 0x00000100,
1238        0x3c208, 0xffffffff, 0x00000100,
1239        0x3c2c0, 0xffffffff, 0x00000100,
1240        0x3c2c8, 0xffffffff, 0x00000100,
1241        0x3c2c4, 0xffffffff, 0x00000100,
1242        0x55e4, 0xffffffff, 0x00600100,
1243        0x3c280, 0xffffffff, 0x00000100,
1244        0x3c214, 0xffffffff, 0x06000100,
1245        0x3c220, 0xffffffff, 0x00000100,
1246        0x3c218, 0xffffffff, 0x06000100,
1247        0x3c204, 0xffffffff, 0x00000100,
1248        0x3c2e0, 0xffffffff, 0x00000100,
1249        0x3c224, 0xffffffff, 0x00000100,
1250        0x3c200, 0xffffffff, 0x00000100,
1251        0x3c230, 0xffffffff, 0x00000100,
1252        0x3c234, 0xffffffff, 0x00000100,
1253        0x3c250, 0xffffffff, 0x00000100,
1254        0x3c254, 0xffffffff, 0x00000100,
1255        0x3c258, 0xffffffff, 0x00000100,
1256        0x3c25c, 0xffffffff, 0x00000100,
1257        0x3c260, 0xffffffff, 0x00000100,
1258        0x3c27c, 0xffffffff, 0x00000100,
1259        0x3c278, 0xffffffff, 0x00000100,
1260        0x3c210, 0xffffffff, 0x06000100,
1261        0x3c290, 0xffffffff, 0x00000100,
1262        0x3c274, 0xffffffff, 0x00000100,
1263        0x3c2b4, 0xffffffff, 0x00000100,
1264        0x3c2b0, 0xffffffff, 0x00000100,
1265        0x3c270, 0xffffffff, 0x00000100,
1266        0x30800, 0xffffffff, 0xe0000000,
1267        0x3c020, 0xffffffff, 0x00010000,
1268        0x3c024, 0xffffffff, 0x00030002,
1269        0x3c028, 0xffffffff, 0x00040007,
1270        0x3c02c, 0xffffffff, 0x00060005,
1271        0x3c030, 0xffffffff, 0x00090008,
1272        0x3c034, 0xffffffff, 0x00010000,
1273        0x3c038, 0xffffffff, 0x00030002,
1274        0x3c03c, 0xffffffff, 0x00040007,
1275        0x3c040, 0xffffffff, 0x00060005,
1276        0x3c044, 0xffffffff, 0x00090008,
1277        0x3c048, 0xffffffff, 0x00010000,
1278        0x3c04c, 0xffffffff, 0x00030002,
1279        0x3c050, 0xffffffff, 0x00040007,
1280        0x3c054, 0xffffffff, 0x00060005,
1281        0x3c058, 0xffffffff, 0x00090008,
1282        0x3c05c, 0xffffffff, 0x00010000,
1283        0x3c060, 0xffffffff, 0x00030002,
1284        0x3c064, 0xffffffff, 0x00040007,
1285        0x3c068, 0xffffffff, 0x00060005,
1286        0x3c06c, 0xffffffff, 0x00090008,
1287        0x3c070, 0xffffffff, 0x00010000,
1288        0x3c074, 0xffffffff, 0x00030002,
1289        0x3c078, 0xffffffff, 0x00040007,
1290        0x3c07c, 0xffffffff, 0x00060005,
1291        0x3c080, 0xffffffff, 0x00090008,
1292        0x3c084, 0xffffffff, 0x00010000,
1293        0x3c088, 0xffffffff, 0x00030002,
1294        0x3c08c, 0xffffffff, 0x00040007,
1295        0x3c090, 0xffffffff, 0x00060005,
1296        0x3c094, 0xffffffff, 0x00090008,
1297        0x3c098, 0xffffffff, 0x00010000,
1298        0x3c09c, 0xffffffff, 0x00030002,
1299        0x3c0a0, 0xffffffff, 0x00040007,
1300        0x3c0a4, 0xffffffff, 0x00060005,
1301        0x3c0a8, 0xffffffff, 0x00090008,
1302        0x3c0ac, 0xffffffff, 0x00010000,
1303        0x3c0b0, 0xffffffff, 0x00030002,
1304        0x3c0b4, 0xffffffff, 0x00040007,
1305        0x3c0b8, 0xffffffff, 0x00060005,
1306        0x3c0bc, 0xffffffff, 0x00090008,
1307        0x3c000, 0xffffffff, 0x96e00200,
1308        0x8708, 0xffffffff, 0x00900100,
1309        0xc424, 0xffffffff, 0x0020003f,
1310        0x38, 0xffffffff, 0x0140001c,
1311        0x3c, 0x000f0000, 0x000f0000,
1312        0x220, 0xffffffff, 0xC060000C,
1313        0x224, 0xc0000fff, 0x00000100,
1314        0xf90, 0xffffffff, 0x00000100,
1315        0xf98, 0x00000101, 0x00000000,
1316        0x20a8, 0xffffffff, 0x00000104,
1317        0x55e4, 0xff000fff, 0x00000100,
1318        0x30cc, 0xc0000fff, 0x00000104,
1319        0xc1e4, 0x00000001, 0x00000001,
1320        0xd00c, 0xff000ff0, 0x00000100,
1321        0xd80c, 0xff000ff0, 0x00000100
1322};
1323
1324static const u32 kalindi_golden_spm_registers[] =
1325{
1326        0x30800, 0xe0ffffff, 0xe0000000
1327};
1328
1329static const u32 kalindi_golden_common_registers[] =
1330{
1331        0xc770, 0xffffffff, 0x00000800,
1332        0xc774, 0xffffffff, 0x00000800,
1333        0xc798, 0xffffffff, 0x00007fbf,
1334        0xc79c, 0xffffffff, 0x00007faf
1335};
1336
1337static const u32 kalindi_golden_registers[] =
1338{
1339        0x3c000, 0xffffdfff, 0x6e944040,
1340        0x55e4, 0xff607fff, 0xfc000100,
1341        0x3c220, 0xff000fff, 0x00000100,
1342        0x3c224, 0xff000fff, 0x00000100,
1343        0x3c200, 0xfffc0fff, 0x00000100,
1344        0x6ed8, 0x00010101, 0x00010000,
1345        0x9830, 0xffffffff, 0x00000000,
1346        0x9834, 0xf00fffff, 0x00000400,
1347        0x5bb0, 0x000000f0, 0x00000070,
1348        0x5bc0, 0xf0311fff, 0x80300000,
1349        0x98f8, 0x73773777, 0x12010001,
1350        0x98fc, 0xffffffff, 0x00000010,
1351        0x9b7c, 0x00ff0000, 0x00fc0000,
1352        0x8030, 0x00001f0f, 0x0000100a,
1353        0x2f48, 0x73773777, 0x12010001,
1354        0x2408, 0x000fffff, 0x000c007f,
1355        0x8a14, 0xf000003f, 0x00000007,
1356        0x8b24, 0x3fff3fff, 0x00ffcfff,
1357        0x30a04, 0x0000ff0f, 0x00000000,
1358        0x28a4c, 0x07ffffff, 0x06000000,
1359        0x4d8, 0x00000fff, 0x00000100,
1360        0x3e78, 0x00000001, 0x00000002,
1361        0xc768, 0x00000008, 0x00000008,
1362        0x8c00, 0x000000ff, 0x00000003,
1363        0x214f8, 0x01ff01ff, 0x00000002,
1364        0x21498, 0x007ff800, 0x00200000,
1365        0x2015c, 0xffffffff, 0x00000f40,
1366        0x88c4, 0x001f3ae3, 0x00000082,
1367        0x88d4, 0x0000001f, 0x00000010,
1368        0x30934, 0xffffffff, 0x00000000
1369};
1370
1371static const u32 kalindi_mgcg_cgcg_init[] =
1372{
1373        0xc420, 0xffffffff, 0xfffffffc,
1374        0x30800, 0xffffffff, 0xe0000000,
1375        0x3c2a0, 0xffffffff, 0x00000100,
1376        0x3c208, 0xffffffff, 0x00000100,
1377        0x3c2c0, 0xffffffff, 0x00000100,
1378        0x3c2c8, 0xffffffff, 0x00000100,
1379        0x3c2c4, 0xffffffff, 0x00000100,
1380        0x55e4, 0xffffffff, 0x00600100,
1381        0x3c280, 0xffffffff, 0x00000100,
1382        0x3c214, 0xffffffff, 0x06000100,
1383        0x3c220, 0xffffffff, 0x00000100,
1384        0x3c218, 0xffffffff, 0x06000100,
1385        0x3c204, 0xffffffff, 0x00000100,
1386        0x3c2e0, 0xffffffff, 0x00000100,
1387        0x3c224, 0xffffffff, 0x00000100,
1388        0x3c200, 0xffffffff, 0x00000100,
1389        0x3c230, 0xffffffff, 0x00000100,
1390        0x3c234, 0xffffffff, 0x00000100,
1391        0x3c250, 0xffffffff, 0x00000100,
1392        0x3c254, 0xffffffff, 0x00000100,
1393        0x3c258, 0xffffffff, 0x00000100,
1394        0x3c25c, 0xffffffff, 0x00000100,
1395        0x3c260, 0xffffffff, 0x00000100,
1396        0x3c27c, 0xffffffff, 0x00000100,
1397        0x3c278, 0xffffffff, 0x00000100,
1398        0x3c210, 0xffffffff, 0x06000100,
1399        0x3c290, 0xffffffff, 0x00000100,
1400        0x3c274, 0xffffffff, 0x00000100,
1401        0x3c2b4, 0xffffffff, 0x00000100,
1402        0x3c2b0, 0xffffffff, 0x00000100,
1403        0x3c270, 0xffffffff, 0x00000100,
1404        0x30800, 0xffffffff, 0xe0000000,
1405        0x3c020, 0xffffffff, 0x00010000,
1406        0x3c024, 0xffffffff, 0x00030002,
1407        0x3c028, 0xffffffff, 0x00040007,
1408        0x3c02c, 0xffffffff, 0x00060005,
1409        0x3c030, 0xffffffff, 0x00090008,
1410        0x3c034, 0xffffffff, 0x00010000,
1411        0x3c038, 0xffffffff, 0x00030002,
1412        0x3c03c, 0xffffffff, 0x00040007,
1413        0x3c040, 0xffffffff, 0x00060005,
1414        0x3c044, 0xffffffff, 0x00090008,
1415        0x3c000, 0xffffffff, 0x96e00200,
1416        0x8708, 0xffffffff, 0x00900100,
1417        0xc424, 0xffffffff, 0x0020003f,
1418        0x38, 0xffffffff, 0x0140001c,
1419        0x3c, 0x000f0000, 0x000f0000,
1420        0x220, 0xffffffff, 0xC060000C,
1421        0x224, 0xc0000fff, 0x00000100,
1422        0x20a8, 0xffffffff, 0x00000104,
1423        0x55e4, 0xff000fff, 0x00000100,
1424        0x30cc, 0xc0000fff, 0x00000104,
1425        0xc1e4, 0x00000001, 0x00000001,
1426        0xd00c, 0xff000ff0, 0x00000100,
1427        0xd80c, 0xff000ff0, 0x00000100
1428};
1429
1430static const u32 hawaii_golden_spm_registers[] =
1431{
1432        0x30800, 0xe0ffffff, 0xe0000000
1433};
1434
1435static const u32 hawaii_golden_common_registers[] =
1436{
1437        0x30800, 0xffffffff, 0xe0000000,
1438        0x28350, 0xffffffff, 0x3a00161a,
1439        0x28354, 0xffffffff, 0x0000002e,
1440        0x9a10, 0xffffffff, 0x00018208,
1441        0x98f8, 0xffffffff, 0x12011003
1442};
1443
1444static const u32 hawaii_golden_registers[] =
1445{
1446        0x3354, 0x00000333, 0x00000333,
1447        0x9a10, 0x00010000, 0x00058208,
1448        0x9830, 0xffffffff, 0x00000000,
1449        0x9834, 0xf00fffff, 0x00000400,
1450        0x9838, 0x0002021c, 0x00020200,
1451        0xc78, 0x00000080, 0x00000000,
1452        0x5bb0, 0x000000f0, 0x00000070,
1453        0x5bc0, 0xf0311fff, 0x80300000,
1454        0x350c, 0x00810000, 0x408af000,
1455        0x7030, 0x31000111, 0x00000011,
1456        0x2f48, 0x73773777, 0x12010001,
1457        0x2120, 0x0000007f, 0x0000001b,
1458        0x21dc, 0x00007fb6, 0x00002191,
1459        0x3628, 0x0000003f, 0x0000000a,
1460        0x362c, 0x0000003f, 0x0000000a,
1461        0x2ae4, 0x00073ffe, 0x000022a2,
1462        0x240c, 0x000007ff, 0x00000000,
1463        0x8bf0, 0x00002001, 0x00000001,
1464        0x8b24, 0xffffffff, 0x00ffffff,
1465        0x30a04, 0x0000ff0f, 0x00000000,
1466        0x28a4c, 0x07ffffff, 0x06000000,
1467        0x3e78, 0x00000001, 0x00000002,
1468        0xc768, 0x00000008, 0x00000008,
1469        0xc770, 0x00000f00, 0x00000800,
1470        0xc774, 0x00000f00, 0x00000800,
1471        0xc798, 0x00ffffff, 0x00ff7fbf,
1472        0xc79c, 0x00ffffff, 0x00ff7faf,
1473        0x8c00, 0x000000ff, 0x00000800,
1474        0xe40, 0x00001fff, 0x00001fff,
1475        0x9060, 0x0000007f, 0x00000020,
1476        0x9508, 0x00010000, 0x00010000,
1477        0xae00, 0x00100000, 0x000ff07c,
1478        0xac14, 0x000003ff, 0x0000000f,
1479        0xac10, 0xffffffff, 0x7564fdec,
1480        0xac0c, 0xffffffff, 0x3120b9a8,
1481        0xac08, 0x20000000, 0x0f9c0000
1482};
1483
1484static const u32 hawaii_mgcg_cgcg_init[] =
1485{
1486        0xc420, 0xffffffff, 0xfffffffd,
1487        0x30800, 0xffffffff, 0xe0000000,
1488        0x3c2a0, 0xffffffff, 0x00000100,
1489        0x3c208, 0xffffffff, 0x00000100,
1490        0x3c2c0, 0xffffffff, 0x00000100,
1491        0x3c2c8, 0xffffffff, 0x00000100,
1492        0x3c2c4, 0xffffffff, 0x00000100,
1493        0x55e4, 0xffffffff, 0x00200100,
1494        0x3c280, 0xffffffff, 0x00000100,
1495        0x3c214, 0xffffffff, 0x06000100,
1496        0x3c220, 0xffffffff, 0x00000100,
1497        0x3c218, 0xffffffff, 0x06000100,
1498        0x3c204, 0xffffffff, 0x00000100,
1499        0x3c2e0, 0xffffffff, 0x00000100,
1500        0x3c224, 0xffffffff, 0x00000100,
1501        0x3c200, 0xffffffff, 0x00000100,
1502        0x3c230, 0xffffffff, 0x00000100,
1503        0x3c234, 0xffffffff, 0x00000100,
1504        0x3c250, 0xffffffff, 0x00000100,
1505        0x3c254, 0xffffffff, 0x00000100,
1506        0x3c258, 0xffffffff, 0x00000100,
1507        0x3c25c, 0xffffffff, 0x00000100,
1508        0x3c260, 0xffffffff, 0x00000100,
1509        0x3c27c, 0xffffffff, 0x00000100,
1510        0x3c278, 0xffffffff, 0x00000100,
1511        0x3c210, 0xffffffff, 0x06000100,
1512        0x3c290, 0xffffffff, 0x00000100,
1513        0x3c274, 0xffffffff, 0x00000100,
1514        0x3c2b4, 0xffffffff, 0x00000100,
1515        0x3c2b0, 0xffffffff, 0x00000100,
1516        0x3c270, 0xffffffff, 0x00000100,
1517        0x30800, 0xffffffff, 0xe0000000,
1518        0x3c020, 0xffffffff, 0x00010000,
1519        0x3c024, 0xffffffff, 0x00030002,
1520        0x3c028, 0xffffffff, 0x00040007,
1521        0x3c02c, 0xffffffff, 0x00060005,
1522        0x3c030, 0xffffffff, 0x00090008,
1523        0x3c034, 0xffffffff, 0x00010000,
1524        0x3c038, 0xffffffff, 0x00030002,
1525        0x3c03c, 0xffffffff, 0x00040007,
1526        0x3c040, 0xffffffff, 0x00060005,
1527        0x3c044, 0xffffffff, 0x00090008,
1528        0x3c048, 0xffffffff, 0x00010000,
1529        0x3c04c, 0xffffffff, 0x00030002,
1530        0x3c050, 0xffffffff, 0x00040007,
1531        0x3c054, 0xffffffff, 0x00060005,
1532        0x3c058, 0xffffffff, 0x00090008,
1533        0x3c05c, 0xffffffff, 0x00010000,
1534        0x3c060, 0xffffffff, 0x00030002,
1535        0x3c064, 0xffffffff, 0x00040007,
1536        0x3c068, 0xffffffff, 0x00060005,
1537        0x3c06c, 0xffffffff, 0x00090008,
1538        0x3c070, 0xffffffff, 0x00010000,
1539        0x3c074, 0xffffffff, 0x00030002,
1540        0x3c078, 0xffffffff, 0x00040007,
1541        0x3c07c, 0xffffffff, 0x00060005,
1542        0x3c080, 0xffffffff, 0x00090008,
1543        0x3c084, 0xffffffff, 0x00010000,
1544        0x3c088, 0xffffffff, 0x00030002,
1545        0x3c08c, 0xffffffff, 0x00040007,
1546        0x3c090, 0xffffffff, 0x00060005,
1547        0x3c094, 0xffffffff, 0x00090008,
1548        0x3c098, 0xffffffff, 0x00010000,
1549        0x3c09c, 0xffffffff, 0x00030002,
1550        0x3c0a0, 0xffffffff, 0x00040007,
1551        0x3c0a4, 0xffffffff, 0x00060005,
1552        0x3c0a8, 0xffffffff, 0x00090008,
1553        0x3c0ac, 0xffffffff, 0x00010000,
1554        0x3c0b0, 0xffffffff, 0x00030002,
1555        0x3c0b4, 0xffffffff, 0x00040007,
1556        0x3c0b8, 0xffffffff, 0x00060005,
1557        0x3c0bc, 0xffffffff, 0x00090008,
1558        0x3c0c0, 0xffffffff, 0x00010000,
1559        0x3c0c4, 0xffffffff, 0x00030002,
1560        0x3c0c8, 0xffffffff, 0x00040007,
1561        0x3c0cc, 0xffffffff, 0x00060005,
1562        0x3c0d0, 0xffffffff, 0x00090008,
1563        0x3c0d4, 0xffffffff, 0x00010000,
1564        0x3c0d8, 0xffffffff, 0x00030002,
1565        0x3c0dc, 0xffffffff, 0x00040007,
1566        0x3c0e0, 0xffffffff, 0x00060005,
1567        0x3c0e4, 0xffffffff, 0x00090008,
1568        0x3c0e8, 0xffffffff, 0x00010000,
1569        0x3c0ec, 0xffffffff, 0x00030002,
1570        0x3c0f0, 0xffffffff, 0x00040007,
1571        0x3c0f4, 0xffffffff, 0x00060005,
1572        0x3c0f8, 0xffffffff, 0x00090008,
1573        0xc318, 0xffffffff, 0x00020200,
1574        0x3350, 0xffffffff, 0x00000200,
1575        0x15c0, 0xffffffff, 0x00000400,
1576        0x55e8, 0xffffffff, 0x00000000,
1577        0x2f50, 0xffffffff, 0x00000902,
1578        0x3c000, 0xffffffff, 0x96940200,
1579        0x8708, 0xffffffff, 0x00900100,
1580        0xc424, 0xffffffff, 0x0020003f,
1581        0x38, 0xffffffff, 0x0140001c,
1582        0x3c, 0x000f0000, 0x000f0000,
1583        0x220, 0xffffffff, 0xc060000c,
1584        0x224, 0xc0000fff, 0x00000100,
1585        0xf90, 0xffffffff, 0x00000100,
1586        0xf98, 0x00000101, 0x00000000,
1587        0x20a8, 0xffffffff, 0x00000104,
1588        0x55e4, 0xff000fff, 0x00000100,
1589        0x30cc, 0xc0000fff, 0x00000104,
1590        0xc1e4, 0x00000001, 0x00000001,
1591        0xd00c, 0xff000ff0, 0x00000100,
1592        0xd80c, 0xff000ff0, 0x00000100
1593};
1594
1595static const u32 godavari_golden_registers[] =
1596{
1597        0x55e4, 0xff607fff, 0xfc000100,
1598        0x6ed8, 0x00010101, 0x00010000,
1599        0x9830, 0xffffffff, 0x00000000,
1600        0x98302, 0xf00fffff, 0x00000400,
1601        0x6130, 0xffffffff, 0x00010000,
1602        0x5bb0, 0x000000f0, 0x00000070,
1603        0x5bc0, 0xf0311fff, 0x80300000,
1604        0x98f8, 0x73773777, 0x12010001,
1605        0x98fc, 0xffffffff, 0x00000010,
1606        0x8030, 0x00001f0f, 0x0000100a,
1607        0x2f48, 0x73773777, 0x12010001,
1608        0x2408, 0x000fffff, 0x000c007f,
1609        0x8a14, 0xf000003f, 0x00000007,
1610        0x8b24, 0xffffffff, 0x00ff0fff,
1611        0x30a04, 0x0000ff0f, 0x00000000,
1612        0x28a4c, 0x07ffffff, 0x06000000,
1613        0x4d8, 0x00000fff, 0x00000100,
1614        0xd014, 0x00010000, 0x00810001,
1615        0xd814, 0x00010000, 0x00810001,
1616        0x3e78, 0x00000001, 0x00000002,
1617        0xc768, 0x00000008, 0x00000008,
1618        0xc770, 0x00000f00, 0x00000800,
1619        0xc774, 0x00000f00, 0x00000800,
1620        0xc798, 0x00ffffff, 0x00ff7fbf,
1621        0xc79c, 0x00ffffff, 0x00ff7faf,
1622        0x8c00, 0x000000ff, 0x00000001,
1623        0x214f8, 0x01ff01ff, 0x00000002,
1624        0x21498, 0x007ff800, 0x00200000,
1625        0x2015c, 0xffffffff, 0x00000f40,
1626        0x88c4, 0x001f3ae3, 0x00000082,
1627        0x88d4, 0x0000001f, 0x00000010,
1628        0x30934, 0xffffffff, 0x00000000
1629};
1630
1631
1632static void cik_init_golden_registers(struct radeon_device *rdev)
1633{
1634        switch (rdev->family) {
1635        case CHIP_BONAIRE:
1636                radeon_program_register_sequence(rdev,
1637                                                 bonaire_mgcg_cgcg_init,
1638                                                 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639                radeon_program_register_sequence(rdev,
1640                                                 bonaire_golden_registers,
1641                                                 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642                radeon_program_register_sequence(rdev,
1643                                                 bonaire_golden_common_registers,
1644                                                 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645                radeon_program_register_sequence(rdev,
1646                                                 bonaire_golden_spm_registers,
1647                                                 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648                break;
1649        case CHIP_KABINI:
1650                radeon_program_register_sequence(rdev,
1651                                                 kalindi_mgcg_cgcg_init,
1652                                                 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653                radeon_program_register_sequence(rdev,
1654                                                 kalindi_golden_registers,
1655                                                 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656                radeon_program_register_sequence(rdev,
1657                                                 kalindi_golden_common_registers,
1658                                                 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659                radeon_program_register_sequence(rdev,
1660                                                 kalindi_golden_spm_registers,
1661                                                 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662                break;
1663        case CHIP_MULLINS:
1664                radeon_program_register_sequence(rdev,
1665                                                 kalindi_mgcg_cgcg_init,
1666                                                 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667                radeon_program_register_sequence(rdev,
1668                                                 godavari_golden_registers,
1669                                                 (const u32)ARRAY_SIZE(godavari_golden_registers));
1670                radeon_program_register_sequence(rdev,
1671                                                 kalindi_golden_common_registers,
1672                                                 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673                radeon_program_register_sequence(rdev,
1674                                                 kalindi_golden_spm_registers,
1675                                                 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676                break;
1677        case CHIP_KAVERI:
1678                radeon_program_register_sequence(rdev,
1679                                                 spectre_mgcg_cgcg_init,
1680                                                 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681                radeon_program_register_sequence(rdev,
1682                                                 spectre_golden_registers,
1683                                                 (const u32)ARRAY_SIZE(spectre_golden_registers));
1684                radeon_program_register_sequence(rdev,
1685                                                 spectre_golden_common_registers,
1686                                                 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687                radeon_program_register_sequence(rdev,
1688                                                 spectre_golden_spm_registers,
1689                                                 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690                break;
1691        case CHIP_HAWAII:
1692                radeon_program_register_sequence(rdev,
1693                                                 hawaii_mgcg_cgcg_init,
1694                                                 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695                radeon_program_register_sequence(rdev,
1696                                                 hawaii_golden_registers,
1697                                                 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698                radeon_program_register_sequence(rdev,
1699                                                 hawaii_golden_common_registers,
1700                                                 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701                radeon_program_register_sequence(rdev,
1702                                                 hawaii_golden_spm_registers,
1703                                                 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704                break;
1705        default:
1706                break;
1707        }
1708}
1709
1710/**
1711 * cik_get_xclk - get the xclk
1712 *
1713 * @rdev: radeon_device pointer
1714 *
1715 * Returns the reference clock used by the gfx engine
1716 * (CIK).
1717 */
1718u32 cik_get_xclk(struct radeon_device *rdev)
1719{
1720        u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722        if (rdev->flags & RADEON_IS_IGP) {
1723                if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724                        return reference_clock / 2;
1725        } else {
1726                if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727                        return reference_clock / 4;
1728        }
1729        return reference_clock;
1730}
1731
1732/**
1733 * cik_mm_rdoorbell - read a doorbell dword
1734 *
1735 * @rdev: radeon_device pointer
1736 * @index: doorbell index
1737 *
1738 * Returns the value in the doorbell aperture at the
1739 * requested doorbell index (CIK).
1740 */
1741u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742{
1743        if (index < rdev->doorbell.num_doorbells) {
1744                return readl(rdev->doorbell.ptr + index);
1745        } else {
1746                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747                return 0;
1748        }
1749}
1750
1751/**
1752 * cik_mm_wdoorbell - write a doorbell dword
1753 *
1754 * @rdev: radeon_device pointer
1755 * @index: doorbell index
1756 * @v: value to write
1757 *
1758 * Writes @v to the doorbell aperture at the
1759 * requested doorbell index (CIK).
1760 */
1761void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762{
1763        if (index < rdev->doorbell.num_doorbells) {
1764                writel(v, rdev->doorbell.ptr + index);
1765        } else {
1766                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767        }
1768}
1769
1770#define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773{
1774        {0x00000070, 0x04400000},
1775        {0x00000071, 0x80c01803},
1776        {0x00000072, 0x00004004},
1777        {0x00000073, 0x00000100},
1778        {0x00000074, 0x00ff0000},
1779        {0x00000075, 0x34000000},
1780        {0x00000076, 0x08000014},
1781        {0x00000077, 0x00cc08ec},
1782        {0x00000078, 0x00000400},
1783        {0x00000079, 0x00000000},
1784        {0x0000007a, 0x04090000},
1785        {0x0000007c, 0x00000000},
1786        {0x0000007e, 0x4408a8e8},
1787        {0x0000007f, 0x00000304},
1788        {0x00000080, 0x00000000},
1789        {0x00000082, 0x00000001},
1790        {0x00000083, 0x00000002},
1791        {0x00000084, 0xf3e4f400},
1792        {0x00000085, 0x052024e3},
1793        {0x00000087, 0x00000000},
1794        {0x00000088, 0x01000000},
1795        {0x0000008a, 0x1c0a0000},
1796        {0x0000008b, 0xff010000},
1797        {0x0000008d, 0xffffefff},
1798        {0x0000008e, 0xfff3efff},
1799        {0x0000008f, 0xfff3efbf},
1800        {0x00000092, 0xf7ffffff},
1801        {0x00000093, 0xffffff7f},
1802        {0x00000095, 0x00101101},
1803        {0x00000096, 0x00000fff},
1804        {0x00000097, 0x00116fff},
1805        {0x00000098, 0x60010000},
1806        {0x00000099, 0x10010000},
1807        {0x0000009a, 0x00006000},
1808        {0x0000009b, 0x00001000},
1809        {0x0000009f, 0x00b48000}
1810};
1811
1812#define HAWAII_IO_MC_REGS_SIZE 22
1813
1814static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815{
1816        {0x0000007d, 0x40000000},
1817        {0x0000007e, 0x40180304},
1818        {0x0000007f, 0x0000ff00},
1819        {0x00000081, 0x00000000},
1820        {0x00000083, 0x00000800},
1821        {0x00000086, 0x00000000},
1822        {0x00000087, 0x00000100},
1823        {0x00000088, 0x00020100},
1824        {0x00000089, 0x00000000},
1825        {0x0000008b, 0x00040000},
1826        {0x0000008c, 0x00000100},
1827        {0x0000008e, 0xff010000},
1828        {0x00000090, 0xffffefff},
1829        {0x00000091, 0xfff3efff},
1830        {0x00000092, 0xfff3efbf},
1831        {0x00000093, 0xf7ffffff},
1832        {0x00000094, 0xffffff7f},
1833        {0x00000095, 0x00000fff},
1834        {0x00000096, 0x00116fff},
1835        {0x00000097, 0x60010000},
1836        {0x00000098, 0x10010000},
1837        {0x0000009f, 0x00c79000}
1838};
1839
1840
1841/**
1842 * cik_srbm_select - select specific register instances
1843 *
1844 * @rdev: radeon_device pointer
1845 * @me: selected ME (micro engine)
1846 * @pipe: pipe
1847 * @queue: queue
1848 * @vmid: VMID
1849 *
1850 * Switches the currently active registers instances.  Some
1851 * registers are instanced per VMID, others are instanced per
1852 * me/pipe/queue combination.
1853 */
1854static void cik_srbm_select(struct radeon_device *rdev,
1855                            u32 me, u32 pipe, u32 queue, u32 vmid)
1856{
1857        u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858                             MEID(me & 0x3) |
1859                             VMID(vmid & 0xf) |
1860                             QUEUEID(queue & 0x7));
1861        WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862}
1863
1864/* ucode loading */
1865/**
1866 * ci_mc_load_microcode - load MC ucode into the hw
1867 *
1868 * @rdev: radeon_device pointer
1869 *
1870 * Load the GDDR MC ucode into the hw (CIK).
1871 * Returns 0 on success, error on failure.
1872 */
1873int ci_mc_load_microcode(struct radeon_device *rdev)
1874{
1875        const __be32 *fw_data = NULL;
1876        const __le32 *new_fw_data = NULL;
1877        u32 running, tmp;
1878        u32 *io_mc_regs = NULL;
1879        const __le32 *new_io_mc_regs = NULL;
1880        int i, regs_size, ucode_size;
1881
1882        if (!rdev->mc_fw)
1883                return -EINVAL;
1884
1885        if (rdev->new_fw) {
1886                const struct mc_firmware_header_v1_0 *hdr =
1887                        (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889                radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891                regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892                new_io_mc_regs = (const __le32 *)
1893                        (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894                ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895                new_fw_data = (const __le32 *)
1896                        (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897        } else {
1898                ucode_size = rdev->mc_fw->size / 4;
1899
1900                switch (rdev->family) {
1901                case CHIP_BONAIRE:
1902                        io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903                        regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904                        break;
1905                case CHIP_HAWAII:
1906                        io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907                        regs_size = HAWAII_IO_MC_REGS_SIZE;
1908                        break;
1909                default:
1910                        return -EINVAL;
1911                }
1912                fw_data = (const __be32 *)rdev->mc_fw->data;
1913        }
1914
1915        running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917        if (running == 0) {
1918                /* reset the engine and set to writable */
1919                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                /* load mc io regs */
1923                for (i = 0; i < regs_size; i++) {
1924                        if (rdev->new_fw) {
1925                                WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                        } else {
1928                                WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                        }
1931                }
1932
1933                tmp = RREG32(MC_SEQ_MISC0);
1934                if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                        WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                        WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                        WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                        WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                }
1940
1941                /* load the MC ucode */
1942                for (i = 0; i < ucode_size; i++) {
1943                        if (rdev->new_fw)
1944                                WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                        else
1946                                WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                }
1948
1949                /* put the engine back into the active state */
1950                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                /* wait for training to complete */
1955                for (i = 0; i < rdev->usec_timeout; i++) {
1956                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                break;
1958                        udelay(1);
1959                }
1960                for (i = 0; i < rdev->usec_timeout; i++) {
1961                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                break;
1963                        udelay(1);
1964                }
1965        }
1966
1967        return 0;
1968}
1969
1970/**
1971 * cik_init_microcode - load ucode images from disk
1972 *
1973 * @rdev: radeon_device pointer
1974 *
1975 * Use the firmware interface to load the ucode images into
1976 * the driver (not loaded into hw).
1977 * Returns 0 on success, error on failure.
1978 */
1979static int cik_init_microcode(struct radeon_device *rdev)
1980{
1981        const char *chip_name;
1982        const char *new_chip_name;
1983        size_t pfp_req_size, me_req_size, ce_req_size,
1984                mec_req_size, rlc_req_size, mc_req_size = 0,
1985                sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986        char fw_name[30];
1987        int new_fw = 0;
1988        int err;
1989        int num_fw;
1990        bool new_smc = false;
1991
1992        DRM_DEBUG("\n");
1993
1994        switch (rdev->family) {
1995        case CHIP_BONAIRE:
1996                chip_name = "BONAIRE";
1997                if ((rdev->pdev->revision == 0x80) ||
1998                    (rdev->pdev->revision == 0x81) ||
1999                    (rdev->pdev->device == 0x665f))
2000                        new_smc = true;
2001                new_chip_name = "bonaire";
2002                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003                me_req_size = CIK_ME_UCODE_SIZE * 4;
2004                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007                mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008                mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010                smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011                num_fw = 8;
2012                break;
2013        case CHIP_HAWAII:
2014                chip_name = "HAWAII";
2015                if (rdev->pdev->revision == 0x80)
2016                        new_smc = true;
2017                new_chip_name = "hawaii";
2018                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019                me_req_size = CIK_ME_UCODE_SIZE * 4;
2020                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023                mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024                mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026                smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027                num_fw = 8;
2028                break;
2029        case CHIP_KAVERI:
2030                chip_name = "KAVERI";
2031                new_chip_name = "kaveri";
2032                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033                me_req_size = CIK_ME_UCODE_SIZE * 4;
2034                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036                rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038                num_fw = 7;
2039                break;
2040        case CHIP_KABINI:
2041                chip_name = "KABINI";
2042                new_chip_name = "kabini";
2043                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044                me_req_size = CIK_ME_UCODE_SIZE * 4;
2045                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047                rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049                num_fw = 6;
2050                break;
2051        case CHIP_MULLINS:
2052                chip_name = "MULLINS";
2053                new_chip_name = "mullins";
2054                pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055                me_req_size = CIK_ME_UCODE_SIZE * 4;
2056                ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057                mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058                rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060                num_fw = 6;
2061                break;
2062        default: BUG();
2063        }
2064
2065        DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068        err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069        if (err) {
2070                snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071                err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072                if (err)
2073                        goto out;
2074                if (rdev->pfp_fw->size != pfp_req_size) {
2075                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076                               rdev->pfp_fw->size, fw_name);
2077                        err = -EINVAL;
2078                        goto out;
2079                }
2080        } else {
2081                err = radeon_ucode_validate(rdev->pfp_fw);
2082                if (err) {
2083                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084                               fw_name);
2085                        goto out;
2086                } else {
2087                        new_fw++;
2088                }
2089        }
2090
2091        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093        if (err) {
2094                snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095                err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096                if (err)
2097                        goto out;
2098                if (rdev->me_fw->size != me_req_size) {
2099                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                               rdev->me_fw->size, fw_name);
2101                        err = -EINVAL;
2102                }
2103        } else {
2104                err = radeon_ucode_validate(rdev->me_fw);
2105                if (err) {
2106                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107                               fw_name);
2108                        goto out;
2109                } else {
2110                        new_fw++;
2111                }
2112        }
2113
2114        snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115        err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116        if (err) {
2117                snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                if (err)
2120                        goto out;
2121                if (rdev->ce_fw->size != ce_req_size) {
2122                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123                               rdev->ce_fw->size, fw_name);
2124                        err = -EINVAL;
2125                }
2126        } else {
2127                err = radeon_ucode_validate(rdev->ce_fw);
2128                if (err) {
2129                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130                               fw_name);
2131                        goto out;
2132                } else {
2133                        new_fw++;
2134                }
2135        }
2136
2137        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138        err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139        if (err) {
2140                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141                err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142                if (err)
2143                        goto out;
2144                if (rdev->mec_fw->size != mec_req_size) {
2145                        pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146                               rdev->mec_fw->size, fw_name);
2147                        err = -EINVAL;
2148                }
2149        } else {
2150                err = radeon_ucode_validate(rdev->mec_fw);
2151                if (err) {
2152                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153                               fw_name);
2154                        goto out;
2155                } else {
2156                        new_fw++;
2157                }
2158        }
2159
2160        if (rdev->family == CHIP_KAVERI) {
2161                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162                err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163                if (err) {
2164                        goto out;
2165                } else {
2166                        err = radeon_ucode_validate(rdev->mec2_fw);
2167                        if (err) {
2168                                goto out;
2169                        } else {
2170                                new_fw++;
2171                        }
2172                }
2173        }
2174
2175        snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176        err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177        if (err) {
2178                snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179                err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180                if (err)
2181                        goto out;
2182                if (rdev->rlc_fw->size != rlc_req_size) {
2183                        pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184                               rdev->rlc_fw->size, fw_name);
2185                        err = -EINVAL;
2186                }
2187        } else {
2188                err = radeon_ucode_validate(rdev->rlc_fw);
2189                if (err) {
2190                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191                               fw_name);
2192                        goto out;
2193                } else {
2194                        new_fw++;
2195                }
2196        }
2197
2198        snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199        err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200        if (err) {
2201                snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202                err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203                if (err)
2204                        goto out;
2205                if (rdev->sdma_fw->size != sdma_req_size) {
2206                        pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207                               rdev->sdma_fw->size, fw_name);
2208                        err = -EINVAL;
2209                }
2210        } else {
2211                err = radeon_ucode_validate(rdev->sdma_fw);
2212                if (err) {
2213                        pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214                               fw_name);
2215                        goto out;
2216                } else {
2217                        new_fw++;
2218                }
2219        }
2220
2221        /* No SMC, MC ucode on APUs */
2222        if (!(rdev->flags & RADEON_IS_IGP)) {
2223                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225                if (err) {
2226                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227                        err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228                        if (err) {
2229                                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230                                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231                                if (err)
2232                                        goto out;
2233                        }
2234                        if ((rdev->mc_fw->size != mc_req_size) &&
2235                            (rdev->mc_fw->size != mc2_req_size)){
2236                                pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237                                       rdev->mc_fw->size, fw_name);
2238                                err = -EINVAL;
2239                        }
2240                        DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241                } else {
2242                        err = radeon_ucode_validate(rdev->mc_fw);
2243                        if (err) {
2244                                pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245                                       fw_name);
2246                                goto out;
2247                        } else {
2248                                new_fw++;
2249                        }
2250                }
2251
2252                if (new_smc)
2253                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254                else
2255                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256                err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257                if (err) {
2258                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259                        err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260                        if (err) {
2261                                pr_err("smc: error loading firmware \"%s\"\n",
2262                                       fw_name);
2263                                release_firmware(rdev->smc_fw);
2264                                rdev->smc_fw = NULL;
2265                                err = 0;
2266                        } else if (rdev->smc_fw->size != smc_req_size) {
2267                                pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268                                       rdev->smc_fw->size, fw_name);
2269                                err = -EINVAL;
2270                        }
2271                } else {
2272                        err = radeon_ucode_validate(rdev->smc_fw);
2273                        if (err) {
2274                                pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275                                       fw_name);
2276                                goto out;
2277                        } else {
2278                                new_fw++;
2279                        }
2280                }
2281        }
2282
2283        if (new_fw == 0) {
2284                rdev->new_fw = false;
2285        } else if (new_fw < num_fw) {
2286                pr_err("ci_fw: mixing new and old firmware!\n");
2287                err = -EINVAL;
2288        } else {
2289                rdev->new_fw = true;
2290        }
2291
2292out:
2293        if (err) {
2294                if (err != -EINVAL)
2295                        pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296                               fw_name);
2297                release_firmware(rdev->pfp_fw);
2298                rdev->pfp_fw = NULL;
2299                release_firmware(rdev->me_fw);
2300                rdev->me_fw = NULL;
2301                release_firmware(rdev->ce_fw);
2302                rdev->ce_fw = NULL;
2303                release_firmware(rdev->mec_fw);
2304                rdev->mec_fw = NULL;
2305                release_firmware(rdev->mec2_fw);
2306                rdev->mec2_fw = NULL;
2307                release_firmware(rdev->rlc_fw);
2308                rdev->rlc_fw = NULL;
2309                release_firmware(rdev->sdma_fw);
2310                rdev->sdma_fw = NULL;
2311                release_firmware(rdev->mc_fw);
2312                rdev->mc_fw = NULL;
2313                release_firmware(rdev->smc_fw);
2314                rdev->smc_fw = NULL;
2315        }
2316        return err;
2317}
2318
2319/*
2320 * Core functions
2321 */
2322/**
2323 * cik_tiling_mode_table_init - init the hw tiling table
2324 *
2325 * @rdev: radeon_device pointer
2326 *
2327 * Starting with SI, the tiling setup is done globally in a
2328 * set of 32 tiling modes.  Rather than selecting each set of
2329 * parameters per surface as on older asics, we just select
2330 * which index in the tiling table we want to use, and the
2331 * surface uses those parameters (CIK).
2332 */
2333static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334{
2335        u32 *tile = rdev->config.cik.tile_mode_array;
2336        u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337        const u32 num_tile_mode_states =
2338                        ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339        const u32 num_secondary_tile_mode_states =
2340                        ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341        u32 reg_offset, split_equal_to_row_size;
2342        u32 num_pipe_configs;
2343        u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344                rdev->config.cik.max_shader_engines;
2345
2346        switch (rdev->config.cik.mem_row_size_in_kb) {
2347        case 1:
2348                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349                break;
2350        case 2:
2351        default:
2352                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353                break;
2354        case 4:
2355                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356                break;
2357        }
2358
2359        num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360        if (num_pipe_configs > 8)
2361                num_pipe_configs = 16;
2362
2363        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364                tile[reg_offset] = 0;
2365        for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366                macrotile[reg_offset] = 0;
2367
2368        switch(num_pipe_configs) {
2369        case 16:
2370                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                           TILE_SPLIT(split_equal_to_row_size));
2390                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                           TILE_SPLIT(split_equal_to_row_size));
2401                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                           PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436                tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                            PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                           NUM_BANKS(ADDR_SURF_16_BANK));
2453                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                           NUM_BANKS(ADDR_SURF_16_BANK));
2457                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                           NUM_BANKS(ADDR_SURF_16_BANK));
2461                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                           NUM_BANKS(ADDR_SURF_16_BANK));
2465                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                           NUM_BANKS(ADDR_SURF_8_BANK));
2469                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                           NUM_BANKS(ADDR_SURF_4_BANK));
2473                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                           NUM_BANKS(ADDR_SURF_2_BANK));
2477                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                           NUM_BANKS(ADDR_SURF_16_BANK));
2481                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                           NUM_BANKS(ADDR_SURF_16_BANK));
2485                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                            NUM_BANKS(ADDR_SURF_16_BANK));
2489                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                            NUM_BANKS(ADDR_SURF_8_BANK));
2493                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                            NUM_BANKS(ADDR_SURF_4_BANK));
2497                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                            NUM_BANKS(ADDR_SURF_2_BANK));
2501                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                            NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510                break;
2511
2512        case 8:
2513                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                           TILE_SPLIT(split_equal_to_row_size));
2533                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                           TILE_SPLIT(split_equal_to_row_size));
2544                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                           PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579                tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595                                NUM_BANKS(ADDR_SURF_16_BANK));
2596                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                NUM_BANKS(ADDR_SURF_16_BANK));
2600                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                NUM_BANKS(ADDR_SURF_16_BANK));
2604                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607                                NUM_BANKS(ADDR_SURF_16_BANK));
2608                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                NUM_BANKS(ADDR_SURF_8_BANK));
2612                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                NUM_BANKS(ADDR_SURF_4_BANK));
2616                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619                                NUM_BANKS(ADDR_SURF_2_BANK));
2620                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                NUM_BANKS(ADDR_SURF_16_BANK));
2624                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627                                NUM_BANKS(ADDR_SURF_16_BANK));
2628                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                NUM_BANKS(ADDR_SURF_16_BANK));
2632                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635                                NUM_BANKS(ADDR_SURF_16_BANK));
2636                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                NUM_BANKS(ADDR_SURF_8_BANK));
2640                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                NUM_BANKS(ADDR_SURF_4_BANK));
2644                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647                                NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653                break;
2654
2655        case 4:
2656                if (num_rbs == 4) {
2657                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                           TILE_SPLIT(split_equal_to_row_size));
2677                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                           TILE_SPLIT(split_equal_to_row_size));
2688                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689                           PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                           PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736                } else if (num_rbs < 4) {
2737                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                           TILE_SPLIT(split_equal_to_row_size));
2757                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                           TILE_SPLIT(split_equal_to_row_size));
2768                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769                           PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                }
2816
2817                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                NUM_BANKS(ADDR_SURF_16_BANK));
2821                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                NUM_BANKS(ADDR_SURF_16_BANK));
2825                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                NUM_BANKS(ADDR_SURF_16_BANK));
2829                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                NUM_BANKS(ADDR_SURF_16_BANK));
2833                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                NUM_BANKS(ADDR_SURF_16_BANK));
2837                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                NUM_BANKS(ADDR_SURF_8_BANK));
2841                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844                                NUM_BANKS(ADDR_SURF_4_BANK));
2845                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                NUM_BANKS(ADDR_SURF_16_BANK));
2849                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                NUM_BANKS(ADDR_SURF_16_BANK));
2853                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                NUM_BANKS(ADDR_SURF_16_BANK));
2857                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                NUM_BANKS(ADDR_SURF_16_BANK));
2861                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                NUM_BANKS(ADDR_SURF_16_BANK));
2865                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868                                NUM_BANKS(ADDR_SURF_8_BANK));
2869                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872                                NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878                break;
2879
2880        case 2:
2881                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                           PIPE_CONFIG(ADDR_SURF_P2) |
2884                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                           PIPE_CONFIG(ADDR_SURF_P2) |
2888                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                           PIPE_CONFIG(ADDR_SURF_P2) |
2892                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                           PIPE_CONFIG(ADDR_SURF_P2) |
2896                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897                tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899                           PIPE_CONFIG(ADDR_SURF_P2) |
2900                           TILE_SPLIT(split_equal_to_row_size));
2901                tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                           PIPE_CONFIG(ADDR_SURF_P2) |
2903                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                           PIPE_CONFIG(ADDR_SURF_P2) |
2907                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909                           MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                           PIPE_CONFIG(ADDR_SURF_P2) |
2911                           TILE_SPLIT(split_equal_to_row_size));
2912                tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913                           PIPE_CONFIG(ADDR_SURF_P2);
2914                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915                           MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                           PIPE_CONFIG(ADDR_SURF_P2));
2917                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                            PIPE_CONFIG(ADDR_SURF_P2) |
2920                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                            PIPE_CONFIG(ADDR_SURF_P2) |
2924                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                            PIPE_CONFIG(ADDR_SURF_P2) |
2928                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                            PIPE_CONFIG(ADDR_SURF_P2) |
2931                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                            PIPE_CONFIG(ADDR_SURF_P2) |
2935                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                            PIPE_CONFIG(ADDR_SURF_P2) |
2939                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                            MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                            PIPE_CONFIG(ADDR_SURF_P2) |
2943                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946                            PIPE_CONFIG(ADDR_SURF_P2));
2947                tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                            PIPE_CONFIG(ADDR_SURF_P2) |
2950                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                            PIPE_CONFIG(ADDR_SURF_P2) |
2954                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                            MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957                            PIPE_CONFIG(ADDR_SURF_P2) |
2958                            SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960                macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                NUM_BANKS(ADDR_SURF_16_BANK));
2964                macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                NUM_BANKS(ADDR_SURF_16_BANK));
2968                macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                NUM_BANKS(ADDR_SURF_16_BANK));
2972                macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                NUM_BANKS(ADDR_SURF_16_BANK));
2976                macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                NUM_BANKS(ADDR_SURF_16_BANK));
2980                macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                NUM_BANKS(ADDR_SURF_16_BANK));
2984                macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                NUM_BANKS(ADDR_SURF_8_BANK));
2988                macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                NUM_BANKS(ADDR_SURF_16_BANK));
2992                macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                NUM_BANKS(ADDR_SURF_16_BANK));
2996                macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                NUM_BANKS(ADDR_SURF_16_BANK));
3000                macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                NUM_BANKS(ADDR_SURF_16_BANK));
3004                macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                NUM_BANKS(ADDR_SURF_16_BANK));
3008                macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011                                NUM_BANKS(ADDR_SURF_16_BANK));
3012                macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015                                NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020                        WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021                break;
3022
3023        default:
3024                DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025        }
3026}
3027
3028/**
3029 * cik_select_se_sh - select which SE, SH to address
3030 *
3031 * @rdev: radeon_device pointer
3032 * @se_num: shader engine to address
3033 * @sh_num: sh block to address
3034 *
3035 * Select which SE, SH combinations to address. Certain
3036 * registers are instanced per SE or SH.  0xffffffff means
3037 * broadcast to all SEs or SHs (CIK).
3038 */
3039static void cik_select_se_sh(struct radeon_device *rdev,
3040                             u32 se_num, u32 sh_num)
3041{
3042        u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045                data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046        else if (se_num == 0xffffffff)
3047                data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048        else if (sh_num == 0xffffffff)
3049                data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050        else
3051                data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052        WREG32(GRBM_GFX_INDEX, data);
3053}
3054
3055/**
3056 * cik_create_bitmask - create a bitmask
3057 *
3058 * @bit_width: length of the mask
3059 *
3060 * create a variable length bit mask (CIK).
3061 * Returns the bitmask.
3062 */
3063static u32 cik_create_bitmask(u32 bit_width)
3064{
3065        u32 i, mask = 0;
3066
3067        for (i = 0; i < bit_width; i++) {
3068                mask <<= 1;
3069                mask |= 1;
3070        }
3071        return mask;
3072}
3073
3074/**
3075 * cik_get_rb_disabled - computes the mask of disabled RBs
3076 *
3077 * @rdev: radeon_device pointer
3078 * @max_rb_num: max RBs (render backends) for the asic
3079 * @se_num: number of SEs (shader engines) for the asic
3080 * @sh_per_se: number of SH blocks per SE for the asic
3081 *
3082 * Calculates the bitmask of disabled RBs (CIK).
3083 * Returns the disabled RB bitmask.
3084 */
3085static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086                              u32 max_rb_num_per_se,
3087                              u32 sh_per_se)
3088{
3089        u32 data, mask;
3090
3091        data = RREG32(CC_RB_BACKEND_DISABLE);
3092        if (data & 1)
3093                data &= BACKEND_DISABLE_MASK;
3094        else
3095                data = 0;
3096        data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098        data >>= BACKEND_DISABLE_SHIFT;
3099
3100        mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102        return data & mask;
3103}
3104
3105/**
3106 * cik_setup_rb - setup the RBs on the asic
3107 *
3108 * @rdev: radeon_device pointer
3109 * @se_num: number of SEs (shader engines) for the asic
3110 * @sh_per_se: number of SH blocks per SE for the asic
3111 * @max_rb_num: max RBs (render backends) for the asic
3112 *
3113 * Configures per-SE/SH RB registers (CIK).
3114 */
3115static void cik_setup_rb(struct radeon_device *rdev,
3116                         u32 se_num, u32 sh_per_se,
3117                         u32 max_rb_num_per_se)
3118{
3119        int i, j;
3120        u32 data, mask;
3121        u32 disabled_rbs = 0;
3122        u32 enabled_rbs = 0;
3123
3124        for (i = 0; i < se_num; i++) {
3125                for (j = 0; j < sh_per_se; j++) {
3126                        cik_select_se_sh(rdev, i, j);
3127                        data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128                        if (rdev->family == CHIP_HAWAII)
3129                                disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130                        else
3131                                disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132                }
3133        }
3134        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135
3136        mask = 1;
3137        for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138                if (!(disabled_rbs & mask))
3139                        enabled_rbs |= mask;
3140                mask <<= 1;
3141        }
3142
3143        rdev->config.cik.backend_enable_mask = enabled_rbs;
3144
3145        for (i = 0; i < se_num; i++) {
3146                cik_select_se_sh(rdev, i, 0xffffffff);
3147                data = 0;
3148                for (j = 0; j < sh_per_se; j++) {
3149                        switch (enabled_rbs & 3) {
3150                        case 0:
3151                                if (j == 0)
3152                                        data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153                                else
3154                                        data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155                                break;
3156                        case 1:
3157                                data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158                                break;
3159                        case 2:
3160                                data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161                                break;
3162                        case 3:
3163                        default:
3164                                data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165                                break;
3166                        }
3167                        enabled_rbs >>= 2;
3168                }
3169                WREG32(PA_SC_RASTER_CONFIG, data);
3170        }
3171        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172}
3173
3174/**
3175 * cik_gpu_init - setup the 3D engine
3176 *
3177 * @rdev: radeon_device pointer
3178 *
3179 * Configures the 3D engine and tiling configuration
3180 * registers so that the 3D engine is usable.
3181 */
3182static void cik_gpu_init(struct radeon_device *rdev)
3183{
3184        u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185        u32 mc_shared_chmap, mc_arb_ramcfg;
3186        u32 hdp_host_path_cntl;
3187        u32 tmp;
3188        int i, j;
3189
3190        switch (rdev->family) {
3191        case CHIP_BONAIRE:
3192                rdev->config.cik.max_shader_engines = 2;
3193                rdev->config.cik.max_tile_pipes = 4;
3194                rdev->config.cik.max_cu_per_sh = 7;
3195                rdev->config.cik.max_sh_per_se = 1;
3196                rdev->config.cik.max_backends_per_se = 2;
3197                rdev->config.cik.max_texture_channel_caches = 4;
3198                rdev->config.cik.max_gprs = 256;
3199                rdev->config.cik.max_gs_threads = 32;
3200                rdev->config.cik.max_hw_contexts = 8;
3201
3202                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207                break;
3208        case CHIP_HAWAII:
3209                rdev->config.cik.max_shader_engines = 4;
3210                rdev->config.cik.max_tile_pipes = 16;
3211                rdev->config.cik.max_cu_per_sh = 11;
3212                rdev->config.cik.max_sh_per_se = 1;
3213                rdev->config.cik.max_backends_per_se = 4;
3214                rdev->config.cik.max_texture_channel_caches = 16;
3215                rdev->config.cik.max_gprs = 256;
3216                rdev->config.cik.max_gs_threads = 32;
3217                rdev->config.cik.max_hw_contexts = 8;
3218
3219                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223                gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224                break;
3225        case CHIP_KAVERI:
3226                rdev->config.cik.max_shader_engines = 1;
3227                rdev->config.cik.max_tile_pipes = 4;
3228                rdev->config.cik.max_cu_per_sh = 8;
3229                rdev->config.cik.max_backends_per_se = 2;
3230                rdev->config.cik.max_sh_per_se = 1;
3231                rdev->config.cik.max_texture_channel_caches = 4;
3232                rdev->config.cik.max_gprs = 256;
3233                rdev->config.cik.max_gs_threads = 16;
3234                rdev->config.cik.max_hw_contexts = 8;
3235
3236                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                break;
3242        case CHIP_KABINI:
3243        case CHIP_MULLINS:
3244        default:
3245                rdev->config.cik.max_shader_engines = 1;
3246                rdev->config.cik.max_tile_pipes = 2;
3247                rdev->config.cik.max_cu_per_sh = 2;
3248                rdev->config.cik.max_sh_per_se = 1;
3249                rdev->config.cik.max_backends_per_se = 1;
3250                rdev->config.cik.max_texture_channel_caches = 2;
3251                rdev->config.cik.max_gprs = 256;
3252                rdev->config.cik.max_gs_threads = 16;
3253                rdev->config.cik.max_hw_contexts = 8;
3254
3255                rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256                rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257                rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258                rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260                break;
3261        }
3262
3263        /* Initialize HDP */
3264        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265                WREG32((0x2c14 + j), 0x00000000);
3266                WREG32((0x2c18 + j), 0x00000000);
3267                WREG32((0x2c1c + j), 0x00000000);
3268                WREG32((0x2c20 + j), 0x00000000);
3269                WREG32((0x2c24 + j), 0x00000000);
3270        }
3271
3272        WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273        WREG32(SRBM_INT_CNTL, 0x1);
3274        WREG32(SRBM_INT_ACK, 0x1);
3275
3276        WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277
3278        mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279        mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280
3281        rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282        rdev->config.cik.mem_max_burst_length_bytes = 256;
3283        tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284        rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285        if (rdev->config.cik.mem_row_size_in_kb > 4)
3286                rdev->config.cik.mem_row_size_in_kb = 4;
3287        /* XXX use MC settings? */
3288        rdev->config.cik.shader_engine_tile_size = 32;
3289        rdev->config.cik.num_gpus = 1;
3290        rdev->config.cik.multi_gpu_tile_size = 64;
3291
3292        /* fix up row size */
3293        gb_addr_config &= ~ROW_SIZE_MASK;
3294        switch (rdev->config.cik.mem_row_size_in_kb) {
3295        case 1:
3296        default:
3297                gb_addr_config |= ROW_SIZE(0);
3298                break;
3299        case 2:
3300                gb_addr_config |= ROW_SIZE(1);
3301                break;
3302        case 4:
3303                gb_addr_config |= ROW_SIZE(2);
3304                break;
3305        }
3306
3307        /* setup tiling info dword.  gb_addr_config is not adequate since it does
3308         * not have bank info, so create a custom tiling dword.
3309         * bits 3:0   num_pipes
3310         * bits 7:4   num_banks
3311         * bits 11:8  group_size
3312         * bits 15:12 row_size
3313         */
3314        rdev->config.cik.tile_config = 0;
3315        switch (rdev->config.cik.num_tile_pipes) {
3316        case 1:
3317                rdev->config.cik.tile_config |= (0 << 0);
3318                break;
3319        case 2:
3320                rdev->config.cik.tile_config |= (1 << 0);
3321                break;
3322        case 4:
3323                rdev->config.cik.tile_config |= (2 << 0);
3324                break;
3325        case 8:
3326        default:
3327                /* XXX what about 12? */
3328                rdev->config.cik.tile_config |= (3 << 0);
3329                break;
3330        }
3331        rdev->config.cik.tile_config |=
3332                ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333        rdev->config.cik.tile_config |=
3334                ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335        rdev->config.cik.tile_config |=
3336                ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337
3338        WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339        WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340        WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341        WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342        WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343        WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344        WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345        WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346
3347        cik_tiling_mode_table_init(rdev);
3348
3349        cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350                     rdev->config.cik.max_sh_per_se,
3351                     rdev->config.cik.max_backends_per_se);
3352
3353        rdev->config.cik.active_cus = 0;
3354        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356                        rdev->config.cik.active_cus +=
3357                                hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358                }
3359        }
3360
3361        /* set HW defaults for 3D engine */
3362        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363
3364        WREG32(SX_DEBUG_1, 0x20);
3365
3366        WREG32(TA_CNTL_AUX, 0x00010000);
3367
3368        tmp = RREG32(SPI_CONFIG_CNTL);
3369        tmp |= 0x03000000;
3370        WREG32(SPI_CONFIG_CNTL, tmp);
3371
3372        WREG32(SQ_CONFIG, 1);
3373
3374        WREG32(DB_DEBUG, 0);
3375
3376        tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377        tmp |= 0x00000400;
3378        WREG32(DB_DEBUG2, tmp);
3379
3380        tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381        tmp |= 0x00020200;
3382        WREG32(DB_DEBUG3, tmp);
3383
3384        tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385        tmp |= 0x00018208;
3386        WREG32(CB_HW_CONTROL, tmp);
3387
3388        WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389
3390        WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391                                 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394
3395        WREG32(VGT_NUM_INSTANCES, 1);
3396
3397        WREG32(CP_PERFMON_CNTL, 0);
3398
3399        WREG32(SQ_CONFIG, 0);
3400
3401        WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402                                          FORCE_EOV_MAX_REZ_CNT(255)));
3403
3404        WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405               AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406
3407        WREG32(VGT_GS_VERTEX_REUSE, 16);
3408        WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409
3410        tmp = RREG32(HDP_MISC_CNTL);
3411        tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412        WREG32(HDP_MISC_CNTL, tmp);
3413
3414        hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415        WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416
3417        WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418        WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419
3420        udelay(50);
3421}
3422
3423/*
3424 * GPU scratch registers helpers function.
3425 */
3426/**
3427 * cik_scratch_init - setup driver info for CP scratch regs
3428 *
3429 * @rdev: radeon_device pointer
3430 *
3431 * Set up the number and offset of the CP scratch registers.
3432 * NOTE: use of CP scratch registers is a legacy inferface and
3433 * is not used by default on newer asics (r6xx+).  On newer asics,
3434 * memory buffers are used for fences rather than scratch regs.
3435 */
3436static void cik_scratch_init(struct radeon_device *rdev)
3437{
3438        int i;
3439
3440        rdev->scratch.num_reg = 7;
3441        rdev->scratch.reg_base = SCRATCH_REG0;
3442        for (i = 0; i < rdev->scratch.num_reg; i++) {
3443                rdev->scratch.free[i] = true;
3444                rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445        }
3446}
3447
3448/**
3449 * cik_ring_test - basic gfx ring test
3450 *
3451 * @rdev: radeon_device pointer
3452 * @ring: radeon_ring structure holding ring information
3453 *
3454 * Allocate a scratch register and write to it using the gfx ring (CIK).
3455 * Provides a basic gfx ring test to verify that the ring is working.
3456 * Used by cik_cp_gfx_resume();
3457 * Returns 0 on success, error on failure.
3458 */
3459int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460{
3461        uint32_t scratch;
3462        uint32_t tmp = 0;
3463        unsigned i;
3464        int r;
3465
3466        r = radeon_scratch_get(rdev, &scratch);
3467        if (r) {
3468                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469                return r;
3470        }
3471        WREG32(scratch, 0xCAFEDEAD);
3472        r = radeon_ring_lock(rdev, ring, 3);
3473        if (r) {
3474                DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475                radeon_scratch_free(rdev, scratch);
3476                return r;
3477        }
3478        radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479        radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480        radeon_ring_write(ring, 0xDEADBEEF);
3481        radeon_ring_unlock_commit(rdev, ring, false);
3482
3483        for (i = 0; i < rdev->usec_timeout; i++) {
3484                tmp = RREG32(scratch);
3485                if (tmp == 0xDEADBEEF)
3486                        break;
3487                udelay(1);
3488        }
3489        if (i < rdev->usec_timeout) {
3490                DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491        } else {
3492                DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493                          ring->idx, scratch, tmp);
3494                r = -EINVAL;
3495        }
3496        radeon_scratch_free(rdev, scratch);
3497        return r;
3498}
3499
3500/**
3501 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502 *
3503 * @rdev: radeon_device pointer
3504 * @ridx: radeon ring index
3505 *
3506 * Emits an hdp flush on the cp.
3507 */
3508static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509                                       int ridx)
3510{
3511        struct radeon_ring *ring = &rdev->ring[ridx];
3512        u32 ref_and_mask;
3513
3514        switch (ring->idx) {
3515        case CAYMAN_RING_TYPE_CP1_INDEX:
3516        case CAYMAN_RING_TYPE_CP2_INDEX:
3517        default:
3518                switch (ring->me) {
3519                case 0:
3520                        ref_and_mask = CP2 << ring->pipe;
3521                        break;
3522                case 1:
3523                        ref_and_mask = CP6 << ring->pipe;
3524                        break;
3525                default:
3526                        return;
3527                }
3528                break;
3529        case RADEON_RING_TYPE_GFX_INDEX:
3530                ref_and_mask = CP0;
3531                break;
3532        }
3533
3534        radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535        radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3537                                 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3538        radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539        radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540        radeon_ring_write(ring, ref_and_mask);
3541        radeon_ring_write(ring, ref_and_mask);
3542        radeon_ring_write(ring, 0x20); /* poll interval */
3543}
3544
3545/**
3546 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547 *
3548 * @rdev: radeon_device pointer
3549 * @fence: radeon fence object
3550 *
3551 * Emits a fence sequnce number on the gfx ring and flushes
3552 * GPU caches.
3553 */
3554void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555                             struct radeon_fence *fence)
3556{
3557        struct radeon_ring *ring = &rdev->ring[fence->ring];
3558        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559
3560        /* Workaround for cache flush problems. First send a dummy EOP
3561         * event down the pipe with seq one below.
3562         */
3563        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565                                 EOP_TC_ACTION_EN |
3566                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567                                 EVENT_INDEX(5)));
3568        radeon_ring_write(ring, addr & 0xfffffffc);
3569        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570                                DATA_SEL(1) | INT_SEL(0));
3571        radeon_ring_write(ring, fence->seq - 1);
3572        radeon_ring_write(ring, 0);
3573
3574        /* Then send the real EOP event down the pipe. */
3575        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577                                 EOP_TC_ACTION_EN |
3578                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579                                 EVENT_INDEX(5)));
3580        radeon_ring_write(ring, addr & 0xfffffffc);
3581        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582        radeon_ring_write(ring, fence->seq);
3583        radeon_ring_write(ring, 0);
3584}
3585
3586/**
3587 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588 *
3589 * @rdev: radeon_device pointer
3590 * @fence: radeon fence object
3591 *
3592 * Emits a fence sequnce number on the compute ring and flushes
3593 * GPU caches.
3594 */
3595void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596                                 struct radeon_fence *fence)
3597{
3598        struct radeon_ring *ring = &rdev->ring[fence->ring];
3599        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600
3601        /* RELEASE_MEM - flush caches, send int */
3602        radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603        radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604                                 EOP_TC_ACTION_EN |
3605                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606                                 EVENT_INDEX(5)));
3607        radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608        radeon_ring_write(ring, addr & 0xfffffffc);
3609        radeon_ring_write(ring, upper_32_bits(addr));
3610        radeon_ring_write(ring, fence->seq);
3611        radeon_ring_write(ring, 0);
3612}
3613
3614/**
3615 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ring: radeon ring buffer object
3619 * @semaphore: radeon semaphore object
3620 * @emit_wait: Is this a sempahore wait?
3621 *
3622 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623 * from running ahead of semaphore waits.
3624 */
3625bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626                             struct radeon_ring *ring,
3627                             struct radeon_semaphore *semaphore,
3628                             bool emit_wait)
3629{
3630        uint64_t addr = semaphore->gpu_addr;
3631        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632
3633        radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634        radeon_ring_write(ring, lower_32_bits(addr));
3635        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636
3637        if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638                /* Prevent the PFP from running ahead of the semaphore wait */
3639                radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640                radeon_ring_write(ring, 0x0);
3641        }
3642
3643        return true;
3644}
3645
3646/**
3647 * cik_copy_cpdma - copy pages using the CP DMA engine
3648 *
3649 * @rdev: radeon_device pointer
3650 * @src_offset: src GPU address
3651 * @dst_offset: dst GPU address
3652 * @num_gpu_pages: number of GPU pages to xfer
3653 * @resv: reservation object to sync to
3654 *
3655 * Copy GPU paging using the CP DMA engine (CIK+).
3656 * Used by the radeon ttm implementation to move pages if
3657 * registered as the asic copy callback.
3658 */
3659struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660                                    uint64_t src_offset, uint64_t dst_offset,
3661                                    unsigned num_gpu_pages,
3662                                    struct reservation_object *resv)
3663{
3664        struct radeon_fence *fence;
3665        struct radeon_sync sync;
3666        int ring_index = rdev->asic->copy.blit_ring_index;
3667        struct radeon_ring *ring = &rdev->ring[ring_index];
3668        u32 size_in_bytes, cur_size_in_bytes, control;
3669        int i, num_loops;
3670        int r = 0;
3671
3672        radeon_sync_create(&sync);
3673
3674        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675        num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676        r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677        if (r) {
3678                DRM_ERROR("radeon: moving bo (%d).\n", r);
3679                radeon_sync_free(rdev, &sync, NULL);
3680                return ERR_PTR(r);
3681        }
3682
3683        radeon_sync_resv(rdev, &sync, resv, false);
3684        radeon_sync_rings(rdev, &sync, ring->idx);
3685
3686        for (i = 0; i < num_loops; i++) {
3687                cur_size_in_bytes = size_in_bytes;
3688                if (cur_size_in_bytes > 0x1fffff)
3689                        cur_size_in_bytes = 0x1fffff;
3690                size_in_bytes -= cur_size_in_bytes;
3691                control = 0;
3692                if (size_in_bytes == 0)
3693                        control |= PACKET3_DMA_DATA_CP_SYNC;
3694                radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695                radeon_ring_write(ring, control);
3696                radeon_ring_write(ring, lower_32_bits(src_offset));
3697                radeon_ring_write(ring, upper_32_bits(src_offset));
3698                radeon_ring_write(ring, lower_32_bits(dst_offset));
3699                radeon_ring_write(ring, upper_32_bits(dst_offset));
3700                radeon_ring_write(ring, cur_size_in_bytes);
3701                src_offset += cur_size_in_bytes;
3702                dst_offset += cur_size_in_bytes;
3703        }
3704
3705        r = radeon_fence_emit(rdev, &fence, ring->idx);
3706        if (r) {
3707                radeon_ring_unlock_undo(rdev, ring);
3708                radeon_sync_free(rdev, &sync, NULL);
3709                return ERR_PTR(r);
3710        }
3711
3712        radeon_ring_unlock_commit(rdev, ring, false);
3713        radeon_sync_free(rdev, &sync, fence);
3714
3715        return fence;
3716}
3717
3718/*
3719 * IB stuff
3720 */
3721/**
3722 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723 *
3724 * @rdev: radeon_device pointer
3725 * @ib: radeon indirect buffer object
3726 *
3727 * Emits a DE (drawing engine) or CE (constant engine) IB
3728 * on the gfx ring.  IBs are usually generated by userspace
3729 * acceleration drivers and submitted to the kernel for
3730 * scheduling on the ring.  This function schedules the IB
3731 * on the gfx ring for execution by the GPU.
3732 */
3733void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734{
3735        struct radeon_ring *ring = &rdev->ring[ib->ring];
3736        unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737        u32 header, control = INDIRECT_BUFFER_VALID;
3738
3739        if (ib->is_const_ib) {
3740                /* set switch buffer packet before const IB */
3741                radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742                radeon_ring_write(ring, 0);
3743
3744                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745        } else {
3746                u32 next_rptr;
3747                if (ring->rptr_save_reg) {
3748                        next_rptr = ring->wptr + 3 + 4;
3749                        radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750                        radeon_ring_write(ring, ((ring->rptr_save_reg -
3751                                                  PACKET3_SET_UCONFIG_REG_START) >> 2));
3752                        radeon_ring_write(ring, next_rptr);
3753                } else if (rdev->wb.enabled) {
3754                        next_rptr = ring->wptr + 5 + 4;
3755                        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756                        radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757                        radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758                        radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759                        radeon_ring_write(ring, next_rptr);
3760                }
3761
3762                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763        }
3764
3765        control |= ib->length_dw | (vm_id << 24);
3766
3767        radeon_ring_write(ring, header);
3768        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769        radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770        radeon_ring_write(ring, control);
3771}
3772
3773/**
3774 * cik_ib_test - basic gfx ring IB test
3775 *
3776 * @rdev: radeon_device pointer
3777 * @ring: radeon_ring structure holding ring information
3778 *
3779 * Allocate an IB and execute it on the gfx ring (CIK).
3780 * Provides a basic gfx ring test to verify that IBs are working.
3781 * Returns 0 on success, error on failure.
3782 */
3783int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784{
3785        struct radeon_ib ib;
3786        uint32_t scratch;
3787        uint32_t tmp = 0;
3788        unsigned i;
3789        int r;
3790
3791        r = radeon_scratch_get(rdev, &scratch);
3792        if (r) {
3793                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794                return r;
3795        }
3796        WREG32(scratch, 0xCAFEDEAD);
3797        r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798        if (r) {
3799                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800                radeon_scratch_free(rdev, scratch);
3801                return r;
3802        }
3803        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805        ib.ptr[2] = 0xDEADBEEF;
3806        ib.length_dw = 3;
3807        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808        if (r) {
3809                radeon_scratch_free(rdev, scratch);
3810                radeon_ib_free(rdev, &ib);
3811                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812                return r;
3813        }
3814        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815                RADEON_USEC_IB_TEST_TIMEOUT));
3816        if (r < 0) {
3817                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818                radeon_scratch_free(rdev, scratch);
3819                radeon_ib_free(rdev, &ib);
3820                return r;
3821        } else if (r == 0) {
3822                DRM_ERROR("radeon: fence wait timed out.\n");
3823                radeon_scratch_free(rdev, scratch);
3824                radeon_ib_free(rdev, &ib);
3825                return -ETIMEDOUT;
3826        }
3827        r = 0;
3828        for (i = 0; i < rdev->usec_timeout; i++) {
3829                tmp = RREG32(scratch);
3830                if (tmp == 0xDEADBEEF)
3831                        break;
3832                udelay(1);
3833        }
3834        if (i < rdev->usec_timeout) {
3835                DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836        } else {
3837                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838                          scratch, tmp);
3839                r = -EINVAL;
3840        }
3841        radeon_scratch_free(rdev, scratch);
3842        radeon_ib_free(rdev, &ib);
3843        return r;
3844}
3845
3846/*
3847 * CP.
3848 * On CIK, gfx and compute now have independant command processors.
3849 *
3850 * GFX
3851 * Gfx consists of a single ring and can process both gfx jobs and
3852 * compute jobs.  The gfx CP consists of three microengines (ME):
3853 * PFP - Pre-Fetch Parser
3854 * ME - Micro Engine
3855 * CE - Constant Engine
3856 * The PFP and ME make up what is considered the Drawing Engine (DE).
3857 * The CE is an asynchronous engine used for updating buffer desciptors
3858 * used by the DE so that they can be loaded into cache in parallel
3859 * while the DE is processing state update packets.
3860 *
3861 * Compute
3862 * The compute CP consists of two microengines (ME):
3863 * MEC1 - Compute MicroEngine 1
3864 * MEC2 - Compute MicroEngine 2
3865 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866 * The queues are exposed to userspace and are programmed directly
3867 * by the compute runtime.
3868 */
3869/**
3870 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871 *
3872 * @rdev: radeon_device pointer
3873 * @enable: enable or disable the MEs
3874 *
3875 * Halts or unhalts the gfx MEs.
3876 */
3877static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878{
3879        if (enable)
3880                WREG32(CP_ME_CNTL, 0);
3881        else {
3882                if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883                        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884                WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886        }
3887        udelay(50);
3888}
3889
3890/**
3891 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892 *
3893 * @rdev: radeon_device pointer
3894 *
3895 * Loads the gfx PFP, ME, and CE ucode.
3896 * Returns 0 for success, -EINVAL if the ucode is not available.
3897 */
3898static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899{
3900        int i;
3901
3902        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903                return -EINVAL;
3904
3905        cik_cp_gfx_enable(rdev, false);
3906
3907        if (rdev->new_fw) {
3908                const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909                        (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910                const struct gfx_firmware_header_v1_0 *ce_hdr =
3911                        (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912                const struct gfx_firmware_header_v1_0 *me_hdr =
3913                        (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914                const __le32 *fw_data;
3915                u32 fw_size;
3916
3917                radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918                radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919                radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920
3921                /* PFP */
3922                fw_data = (const __le32 *)
3923                        (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924                fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925                WREG32(CP_PFP_UCODE_ADDR, 0);
3926                for (i = 0; i < fw_size; i++)
3927                        WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928                WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929
3930                /* CE */
3931                fw_data = (const __le32 *)
3932                        (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933                fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934                WREG32(CP_CE_UCODE_ADDR, 0);
3935                for (i = 0; i < fw_size; i++)
3936                        WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937                WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938
3939                /* ME */
3940                fw_data = (const __be32 *)
3941                        (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942                fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943                WREG32(CP_ME_RAM_WADDR, 0);
3944                for (i = 0; i < fw_size; i++)
3945                        WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946                WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947                WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948        } else {
3949                const __be32 *fw_data;
3950
3951                /* PFP */
3952                fw_data = (const __be32 *)rdev->pfp_fw->data;
3953                WREG32(CP_PFP_UCODE_ADDR, 0);
3954                for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955                        WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956                WREG32(CP_PFP_UCODE_ADDR, 0);
3957
3958                /* CE */
3959                fw_data = (const __be32 *)rdev->ce_fw->data;
3960                WREG32(CP_CE_UCODE_ADDR, 0);
3961                for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962                        WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963                WREG32(CP_CE_UCODE_ADDR, 0);
3964
3965                /* ME */
3966                fw_data = (const __be32 *)rdev->me_fw->data;
3967                WREG32(CP_ME_RAM_WADDR, 0);
3968                for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969                        WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970                WREG32(CP_ME_RAM_WADDR, 0);
3971        }
3972
3973        return 0;
3974}
3975
3976/**
3977 * cik_cp_gfx_start - start the gfx ring
3978 *
3979 * @rdev: radeon_device pointer
3980 *
3981 * Enables the ring and loads the clear state context and other
3982 * packets required to init the ring.
3983 * Returns 0 for success, error for failure.
3984 */
3985static int cik_cp_gfx_start(struct radeon_device *rdev)
3986{
3987        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988        int r, i;
3989
3990        /* init the CP */
3991        WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992        WREG32(CP_ENDIAN_SWAP, 0);
3993        WREG32(CP_DEVICE_ID, 1);
3994
3995        cik_cp_gfx_enable(rdev, true);
3996
3997        r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998        if (r) {
3999                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000                return r;
4001        }
4002
4003        /* init the CE partitions.  CE only used for gfx on CIK */
4004        radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005        radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006        radeon_ring_write(ring, 0x8000);
4007        radeon_ring_write(ring, 0x8000);
4008
4009        /* setup clear context state */
4010        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011        radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012
4013        radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014        radeon_ring_write(ring, 0x80000000);
4015        radeon_ring_write(ring, 0x80000000);
4016
4017        for (i = 0; i < cik_default_size; i++)
4018                radeon_ring_write(ring, cik_default_state[i]);
4019
4020        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021        radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022
4023        /* set clear context state */
4024        radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025        radeon_ring_write(ring, 0);
4026
4027        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028        radeon_ring_write(ring, 0x00000316);
4029        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030        radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031
4032        radeon_ring_unlock_commit(rdev, ring, false);
4033
4034        return 0;
4035}
4036
4037/**
4038 * cik_cp_gfx_fini - stop the gfx ring
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Stop the gfx ring and tear down the driver ring
4043 * info.
4044 */
4045static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046{
4047        cik_cp_gfx_enable(rdev, false);
4048        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049}
4050
4051/**
4052 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053 *
4054 * @rdev: radeon_device pointer
4055 *
4056 * Program the location and size of the gfx ring buffer
4057 * and test it to make sure it's working.
4058 * Returns 0 for success, error for failure.
4059 */
4060static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061{
4062        struct radeon_ring *ring;
4063        u32 tmp;
4064        u32 rb_bufsz;
4065        u64 rb_addr;
4066        int r;
4067
4068        WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069        if (rdev->family != CHIP_HAWAII)
4070                WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071
4072        /* Set the write pointer delay */
4073        WREG32(CP_RB_WPTR_DELAY, 0);
4074
4075        /* set the RB to use vmid 0 */
4076        WREG32(CP_RB_VMID, 0);
4077
4078        WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079
4080        /* ring 0 - compute and gfx */
4081        /* Set ring buffer size */
4082        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083        rb_bufsz = order_base_2(ring->ring_size / 8);
4084        tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085#ifdef __BIG_ENDIAN
4086        tmp |= BUF_SWAP_32BIT;
4087#endif
4088        WREG32(CP_RB0_CNTL, tmp);
4089
4090        /* Initialize the ring buffer's read and write pointers */
4091        WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092        ring->wptr = 0;
4093        WREG32(CP_RB0_WPTR, ring->wptr);
4094
4095        /* set the wb address wether it's enabled or not */
4096        WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097        WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098
4099        /* scratch register shadowing is no longer supported */
4100        WREG32(SCRATCH_UMSK, 0);
4101
4102        if (!rdev->wb.enabled)
4103                tmp |= RB_NO_UPDATE;
4104
4105        mdelay(1);
4106        WREG32(CP_RB0_CNTL, tmp);
4107
4108        rb_addr = ring->gpu_addr >> 8;
4109        WREG32(CP_RB0_BASE, rb_addr);
4110        WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111
4112        /* start the ring */
4113        cik_cp_gfx_start(rdev);
4114        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116        if (r) {
4117                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118                return r;
4119        }
4120
4121        if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122                radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123
4124        return 0;
4125}
4126
4127u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128                     struct radeon_ring *ring)
4129{
4130        u32 rptr;
4131
4132        if (rdev->wb.enabled)
4133                rptr = rdev->wb.wb[ring->rptr_offs/4];
4134        else
4135                rptr = RREG32(CP_RB0_RPTR);
4136
4137        return rptr;
4138}
4139
4140u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141                     struct radeon_ring *ring)
4142{
4143        return RREG32(CP_RB0_WPTR);
4144}
4145
4146void cik_gfx_set_wptr(struct radeon_device *rdev,
4147                      struct radeon_ring *ring)
4148{
4149        WREG32(CP_RB0_WPTR, ring->wptr);
4150        (void)RREG32(CP_RB0_WPTR);
4151}
4152
4153u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154                         struct radeon_ring *ring)
4155{
4156        u32 rptr;
4157
4158        if (rdev->wb.enabled) {
4159                rptr = rdev->wb.wb[ring->rptr_offs/4];
4160        } else {
4161                mutex_lock(&rdev->srbm_mutex);
4162                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163                rptr = RREG32(CP_HQD_PQ_RPTR);
4164                cik_srbm_select(rdev, 0, 0, 0, 0);
4165                mutex_unlock(&rdev->srbm_mutex);
4166        }
4167
4168        return rptr;
4169}
4170
4171u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172                         struct radeon_ring *ring)
4173{
4174        u32 wptr;
4175
4176        if (rdev->wb.enabled) {
4177                /* XXX check if swapping is necessary on BE */
4178                wptr = rdev->wb.wb[ring->wptr_offs/4];
4179        } else {
4180                mutex_lock(&rdev->srbm_mutex);
4181                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182                wptr = RREG32(CP_HQD_PQ_WPTR);
4183                cik_srbm_select(rdev, 0, 0, 0, 0);
4184                mutex_unlock(&rdev->srbm_mutex);
4185        }
4186
4187        return wptr;
4188}
4189
4190void cik_compute_set_wptr(struct radeon_device *rdev,
4191                          struct radeon_ring *ring)
4192{
4193        /* XXX check if swapping is necessary on BE */
4194        rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195        WDOORBELL32(ring->doorbell_index, ring->wptr);
4196}
4197
4198static void cik_compute_stop(struct radeon_device *rdev,
4199                             struct radeon_ring *ring)
4200{
4201        u32 j, tmp;
4202
4203        cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204        /* Disable wptr polling. */
4205        tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206        tmp &= ~WPTR_POLL_EN;
4207        WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208        /* Disable HQD. */
4209        if (RREG32(CP_HQD_ACTIVE) & 1) {
4210                WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211                for (j = 0; j < rdev->usec_timeout; j++) {
4212                        if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213                                break;
4214                        udelay(1);
4215                }
4216                WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217                WREG32(CP_HQD_PQ_RPTR, 0);
4218                WREG32(CP_HQD_PQ_WPTR, 0);
4219        }
4220        cik_srbm_select(rdev, 0, 0, 0, 0);
4221}
4222
4223/**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232{
4233        if (enable)
4234                WREG32(CP_MEC_CNTL, 0);
4235        else {
4236                /*
4237                 * To make hibernation reliable we need to clear compute ring
4238                 * configuration before halting the compute ring.
4239                 */
4240                mutex_lock(&rdev->srbm_mutex);
4241                cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242                cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243                mutex_unlock(&rdev->srbm_mutex);
4244
4245                WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248        }
4249        udelay(50);
4250}
4251
4252/**
4253 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254 *
4255 * @rdev: radeon_device pointer
4256 *
4257 * Loads the compute MEC1&2 ucode.
4258 * Returns 0 for success, -EINVAL if the ucode is not available.
4259 */
4260static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261{
4262        int i;
4263
4264        if (!rdev->mec_fw)
4265                return -EINVAL;
4266
4267        cik_cp_compute_enable(rdev, false);
4268
4269        if (rdev->new_fw) {
4270                const struct gfx_firmware_header_v1_0 *mec_hdr =
4271                        (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272                const __le32 *fw_data;
4273                u32 fw_size;
4274
4275                radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276
4277                /* MEC1 */
4278                fw_data = (const __le32 *)
4279                        (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280                fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282                for (i = 0; i < fw_size; i++)
4283                        WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284                WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285
4286                /* MEC2 */
4287                if (rdev->family == CHIP_KAVERI) {
4288                        const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289                                (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290
4291                        fw_data = (const __le32 *)
4292                                (rdev->mec2_fw->data +
4293                                 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294                        fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296                        for (i = 0; i < fw_size; i++)
4297                                WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298                        WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299                }
4300        } else {
4301                const __be32 *fw_data;
4302
4303                /* MEC1 */
4304                fw_data = (const __be32 *)rdev->mec_fw->data;
4305                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306                for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307                        WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308                WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309
4310                if (rdev->family == CHIP_KAVERI) {
4311                        /* MEC2 */
4312                        fw_data = (const __be32 *)rdev->mec_fw->data;
4313                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314                        for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315                                WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316                        WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317                }
4318        }
4319
4320        return 0;
4321}
4322
4323/**
4324 * cik_cp_compute_start - start the compute queues
4325 *
4326 * @rdev: radeon_device pointer
4327 *
4328 * Enable the compute queues.
4329 * Returns 0 for success, error for failure.
4330 */
4331static int cik_cp_compute_start(struct radeon_device *rdev)
4332{
4333        cik_cp_compute_enable(rdev, true);
4334
4335        return 0;
4336}
4337
4338/**
4339 * cik_cp_compute_fini - stop the compute queues
4340 *
4341 * @rdev: radeon_device pointer
4342 *
4343 * Stop the compute queues and tear down the driver queue
4344 * info.
4345 */
4346static void cik_cp_compute_fini(struct radeon_device *rdev)
4347{
4348        int i, idx, r;
4349
4350        cik_cp_compute_enable(rdev, false);
4351
4352        for (i = 0; i < 2; i++) {
4353                if (i == 0)
4354                        idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355                else
4356                        idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357
4358                if (rdev->ring[idx].mqd_obj) {
4359                        r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360                        if (unlikely(r != 0))
4361                                dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362
4363                        radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364                        radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365
4366                        radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367                        rdev->ring[idx].mqd_obj = NULL;
4368                }
4369        }
4370}
4371
4372static void cik_mec_fini(struct radeon_device *rdev)
4373{
4374        int r;
4375
4376        if (rdev->mec.hpd_eop_obj) {
4377                r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378                if (unlikely(r != 0))
4379                        dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380                radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381                radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382
4383                radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384                rdev->mec.hpd_eop_obj = NULL;
4385        }
4386}
4387
4388#define MEC_HPD_SIZE 2048
4389
4390static int cik_mec_init(struct radeon_device *rdev)
4391{
4392        int r;
4393        u32 *hpd;
4394
4395        /*
4396         * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397         * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398         */
4399        if (rdev->family == CHIP_KAVERI)
4400                rdev->mec.num_mec = 2;
4401        else
4402                rdev->mec.num_mec = 1;
4403        rdev->mec.num_pipe = 4;
4404        rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405
4406        if (rdev->mec.hpd_eop_obj == NULL) {
4407                r = radeon_bo_create(rdev,
4408                                     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409                                     PAGE_SIZE, true,
4410                                     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411                                     &rdev->mec.hpd_eop_obj);
4412                if (r) {
4413                        dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414                        return r;
4415                }
4416        }
4417
4418        r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419        if (unlikely(r != 0)) {
4420                cik_mec_fini(rdev);
4421                return r;
4422        }
4423        r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424                          &rdev->mec.hpd_eop_gpu_addr);
4425        if (r) {
4426                dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427                cik_mec_fini(rdev);
4428                return r;
4429        }
4430        r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431        if (r) {
4432                dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433                cik_mec_fini(rdev);
4434                return r;
4435        }
4436
4437        /* clear memory.  Not sure if this is required or not */
4438        memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439
4440        radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441        radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442
4443        return 0;
4444}
4445
4446struct hqd_registers
4447{
4448        u32 cp_mqd_base_addr;
4449        u32 cp_mqd_base_addr_hi;
4450        u32 cp_hqd_active;
4451        u32 cp_hqd_vmid;
4452        u32 cp_hqd_persistent_state;
4453        u32 cp_hqd_pipe_priority;
4454        u32 cp_hqd_queue_priority;
4455        u32 cp_hqd_quantum;
4456        u32 cp_hqd_pq_base;
4457        u32 cp_hqd_pq_base_hi;
4458        u32 cp_hqd_pq_rptr;
4459        u32 cp_hqd_pq_rptr_report_addr;
4460        u32 cp_hqd_pq_rptr_report_addr_hi;
4461        u32 cp_hqd_pq_wptr_poll_addr;
4462        u32 cp_hqd_pq_wptr_poll_addr_hi;
4463        u32 cp_hqd_pq_doorbell_control;
4464        u32 cp_hqd_pq_wptr;
4465        u32 cp_hqd_pq_control;
4466        u32 cp_hqd_ib_base_addr;
4467        u32 cp_hqd_ib_base_addr_hi;
4468        u32 cp_hqd_ib_rptr;
4469        u32 cp_hqd_ib_control;
4470        u32 cp_hqd_iq_timer;
4471        u32 cp_hqd_iq_rptr;
4472        u32 cp_hqd_dequeue_request;
4473        u32 cp_hqd_dma_offload;
4474        u32 cp_hqd_sema_cmd;
4475        u32 cp_hqd_msg_type;
4476        u32 cp_hqd_atomic0_preop_lo;
4477        u32 cp_hqd_atomic0_preop_hi;
4478        u32 cp_hqd_atomic1_preop_lo;
4479        u32 cp_hqd_atomic1_preop_hi;
4480        u32 cp_hqd_hq_scheduler0;
4481        u32 cp_hqd_hq_scheduler1;
4482        u32 cp_mqd_control;
4483};
4484
4485struct bonaire_mqd
4486{
4487        u32 header;
4488        u32 dispatch_initiator;
4489        u32 dimensions[3];
4490        u32 start_idx[3];
4491        u32 num_threads[3];
4492        u32 pipeline_stat_enable;
4493        u32 perf_counter_enable;
4494        u32 pgm[2];
4495        u32 tba[2];
4496        u32 tma[2];
4497        u32 pgm_rsrc[2];
4498        u32 vmid;
4499        u32 resource_limits;
4500        u32 static_thread_mgmt01[2];
4501        u32 tmp_ring_size;
4502        u32 static_thread_mgmt23[2];
4503        u32 restart[3];
4504        u32 thread_trace_enable;
4505        u32 reserved1;
4506        u32 user_data[16];
4507        u32 vgtcs_invoke_count[2];
4508        struct hqd_registers queue_state;
4509        u32 dequeue_cntr;
4510        u32 interrupt_queue[64];
4511};
4512
4513/**
4514 * cik_cp_compute_resume - setup the compute queue registers
4515 *
4516 * @rdev: radeon_device pointer
4517 *
4518 * Program the compute queues and test them to make sure they
4519 * are working.
4520 * Returns 0 for success, error for failure.
4521 */
4522static int cik_cp_compute_resume(struct radeon_device *rdev)
4523{
4524        int r, i, j, idx;
4525        u32 tmp;
4526        bool use_doorbell = true;
4527        u64 hqd_gpu_addr;
4528        u64 mqd_gpu_addr;
4529        u64 eop_gpu_addr;
4530        u64 wb_gpu_addr;
4531        u32 *buf;
4532        struct bonaire_mqd *mqd;
4533
4534        r = cik_cp_compute_start(rdev);
4535        if (r)
4536                return r;
4537
4538        /* fix up chicken bits */
4539        tmp = RREG32(CP_CPF_DEBUG);
4540        tmp |= (1 << 23);
4541        WREG32(CP_CPF_DEBUG, tmp);
4542
4543        /* init the pipes */
4544        mutex_lock(&rdev->srbm_mutex);
4545
4546        for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547                int me = (i < 4) ? 1 : 2;
4548                int pipe = (i < 4) ? i : (i - 4);
4549
4550                cik_srbm_select(rdev, me, pipe, 0, 0);
4551
4552                eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553                /* write the EOP addr */
4554                WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555                WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556
4557                /* set the VMID assigned */
4558                WREG32(CP_HPD_EOP_VMID, 0);
4559
4560                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561                tmp = RREG32(CP_HPD_EOP_CONTROL);
4562                tmp &= ~EOP_SIZE_MASK;
4563                tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564                WREG32(CP_HPD_EOP_CONTROL, tmp);
4565
4566        }
4567        cik_srbm_select(rdev, 0, 0, 0, 0);
4568        mutex_unlock(&rdev->srbm_mutex);
4569
4570        /* init the queues.  Just two for now. */
4571        for (i = 0; i < 2; i++) {
4572                if (i == 0)
4573                        idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574                else
4575                        idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576
4577                if (rdev->ring[idx].mqd_obj == NULL) {
4578                        r = radeon_bo_create(rdev,
4579                                             sizeof(struct bonaire_mqd),
4580                                             PAGE_SIZE, true,
4581                                             RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582                                             NULL, &rdev->ring[idx].mqd_obj);
4583                        if (r) {
4584                                dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585                                return r;
4586                        }
4587                }
4588
4589                r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590                if (unlikely(r != 0)) {
4591                        cik_cp_compute_fini(rdev);
4592                        return r;
4593                }
4594                r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595                                  &mqd_gpu_addr);
4596                if (r) {
4597                        dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598                        cik_cp_compute_fini(rdev);
4599                        return r;
4600                }
4601                r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602                if (r) {
4603                        dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604                        cik_cp_compute_fini(rdev);
4605                        return r;
4606                }
4607
4608                /* init the mqd struct */
4609                memset(buf, 0, sizeof(struct bonaire_mqd));
4610
4611                mqd = (struct bonaire_mqd *)buf;
4612                mqd->header = 0xC0310800;
4613                mqd->static_thread_mgmt01[0] = 0xffffffff;
4614                mqd->static_thread_mgmt01[1] = 0xffffffff;
4615                mqd->static_thread_mgmt23[0] = 0xffffffff;
4616                mqd->static_thread_mgmt23[1] = 0xffffffff;
4617
4618                mutex_lock(&rdev->srbm_mutex);
4619                cik_srbm_select(rdev, rdev->ring[idx].me,
4620                                rdev->ring[idx].pipe,
4621                                rdev->ring[idx].queue, 0);
4622
4623                /* disable wptr polling */
4624                tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625                tmp &= ~WPTR_POLL_EN;
4626                WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627
4628                /* enable doorbell? */
4629                mqd->queue_state.cp_hqd_pq_doorbell_control =
4630                        RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631                if (use_doorbell)
4632                        mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633                else
4634                        mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635                WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636                       mqd->queue_state.cp_hqd_pq_doorbell_control);
4637
4638                /* disable the queue if it's active */
4639                mqd->queue_state.cp_hqd_dequeue_request = 0;
4640                mqd->queue_state.cp_hqd_pq_rptr = 0;
4641                mqd->queue_state.cp_hqd_pq_wptr= 0;
4642                if (RREG32(CP_HQD_ACTIVE) & 1) {
4643                        WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644                        for (j = 0; j < rdev->usec_timeout; j++) {
4645                                if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646                                        break;
4647                                udelay(1);
4648                        }
4649                        WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650                        WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651                        WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652                }
4653
4654                /* set the pointer to the MQD */
4655                mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656                mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657                WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658                WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659                /* set MQD vmid to 0 */
4660                mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661                mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662                WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663
4664                /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665                hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666                mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667                mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668                WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669                WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670
4671                /* set up the HQD, this is similar to CP_RB0_CNTL */
4672                mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673                mqd->queue_state.cp_hqd_pq_control &=
4674                        ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675
4676                mqd->queue_state.cp_hqd_pq_control |=
4677                        order_base_2(rdev->ring[idx].ring_size / 8);
4678                mqd->queue_state.cp_hqd_pq_control |=
4679                        (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680#ifdef __BIG_ENDIAN
4681                mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682#endif
4683                mqd->queue_state.cp_hqd_pq_control &=
4684                        ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685                mqd->queue_state.cp_hqd_pq_control |=
4686                        PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687                WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688
4689                /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690                if (i == 0)
4691                        wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692                else
4693                        wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694                mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695                mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696                WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697                WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698                       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699
4700                /* set the wb address wether it's enabled or not */
4701                if (i == 0)
4702                        wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703                else
4704                        wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705                mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706                mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707                        upper_32_bits(wb_gpu_addr) & 0xffff;
4708                WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709                       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710                WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711                       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712
4713                /* enable the doorbell if requested */
4714                if (use_doorbell) {
4715                        mqd->queue_state.cp_hqd_pq_doorbell_control =
4716                                RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717                        mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718                        mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719                                DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720                        mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721                        mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722                                ~(DOORBELL_SOURCE | DOORBELL_HIT);
4723
4724                } else {
4725                        mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726                }
4727                WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728                       mqd->queue_state.cp_hqd_pq_doorbell_control);
4729
4730                /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731                rdev->ring[idx].wptr = 0;
4732                mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733                WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734                mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735
4736                /* set the vmid for the queue */
4737                mqd->queue_state.cp_hqd_vmid = 0;
4738                WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739
4740                /* activate the queue */
4741                mqd->queue_state.cp_hqd_active = 1;
4742                WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743
4744                cik_srbm_select(rdev, 0, 0, 0, 0);
4745                mutex_unlock(&rdev->srbm_mutex);
4746
4747                radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748                radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750                rdev->ring[idx].ready = true;
4751                r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752                if (r)
4753                        rdev->ring[idx].ready = false;
4754        }
4755
4756        return 0;
4757}
4758
4759static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760{
4761        cik_cp_gfx_enable(rdev, enable);
4762        cik_cp_compute_enable(rdev, enable);
4763}
4764
4765static int cik_cp_load_microcode(struct radeon_device *rdev)
4766{
4767        int r;
4768
4769        r = cik_cp_gfx_load_microcode(rdev);
4770        if (r)
4771                return r;
4772        r = cik_cp_compute_load_microcode(rdev);
4773        if (r)
4774                return r;
4775
4776        return 0;
4777}
4778
4779static void cik_cp_fini(struct radeon_device *rdev)
4780{
4781        cik_cp_gfx_fini(rdev);
4782        cik_cp_compute_fini(rdev);
4783}
4784
4785static int cik_cp_resume(struct radeon_device *rdev)
4786{
4787        int r;
4788
4789        cik_enable_gui_idle_interrupt(rdev, false);
4790
4791        r = cik_cp_load_microcode(rdev);
4792        if (r)
4793                return r;
4794
4795        r = cik_cp_gfx_resume(rdev);
4796        if (r)
4797                return r;
4798        r = cik_cp_compute_resume(rdev);
4799        if (r)
4800                return r;
4801
4802        cik_enable_gui_idle_interrupt(rdev, true);
4803
4804        return 0;
4805}
4806
4807static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808{
4809        dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4810                RREG32(GRBM_STATUS));
4811        dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4812                RREG32(GRBM_STATUS2));
4813        dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4814                RREG32(GRBM_STATUS_SE0));
4815        dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4816                RREG32(GRBM_STATUS_SE1));
4817        dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4818                RREG32(GRBM_STATUS_SE2));
4819        dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4820                RREG32(GRBM_STATUS_SE3));
4821        dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4822                RREG32(SRBM_STATUS));
4823        dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4824                RREG32(SRBM_STATUS2));
4825        dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4826                RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827        dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4828                 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829        dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830        dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4831                 RREG32(CP_STALLED_STAT1));
4832        dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4833                 RREG32(CP_STALLED_STAT2));
4834        dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4835                 RREG32(CP_STALLED_STAT3));
4836        dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4837                 RREG32(CP_CPF_BUSY_STAT));
4838        dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839                 RREG32(CP_CPF_STALLED_STAT1));
4840        dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841        dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842        dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843                 RREG32(CP_CPC_STALLED_STAT1));
4844        dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845}
4846
4847/**
4848 * cik_gpu_check_soft_reset - check which blocks are busy
4849 *
4850 * @rdev: radeon_device pointer
4851 *
4852 * Check which blocks are busy and return the relevant reset
4853 * mask to be used by cik_gpu_soft_reset().
4854 * Returns a mask of the blocks to be reset.
4855 */
4856u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857{
4858        u32 reset_mask = 0;
4859        u32 tmp;
4860
4861        /* GRBM_STATUS */
4862        tmp = RREG32(GRBM_STATUS);
4863        if (tmp & (PA_BUSY | SC_BUSY |
4864                   BCI_BUSY | SX_BUSY |
4865                   TA_BUSY | VGT_BUSY |
4866                   DB_BUSY | CB_BUSY |
4867                   GDS_BUSY | SPI_BUSY |
4868                   IA_BUSY | IA_BUSY_NO_DMA))
4869                reset_mask |= RADEON_RESET_GFX;
4870
4871        if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872                reset_mask |= RADEON_RESET_CP;
4873
4874        /* GRBM_STATUS2 */
4875        tmp = RREG32(GRBM_STATUS2);
4876        if (tmp & RLC_BUSY)
4877                reset_mask |= RADEON_RESET_RLC;
4878
4879        /* SDMA0_STATUS_REG */
4880        tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881        if (!(tmp & SDMA_IDLE))
4882                reset_mask |= RADEON_RESET_DMA;
4883
4884        /* SDMA1_STATUS_REG */
4885        tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886        if (!(tmp & SDMA_IDLE))
4887                reset_mask |= RADEON_RESET_DMA1;
4888
4889        /* SRBM_STATUS2 */
4890        tmp = RREG32(SRBM_STATUS2);
4891        if (tmp & SDMA_BUSY)
4892                reset_mask |= RADEON_RESET_DMA;
4893
4894        if (tmp & SDMA1_BUSY)
4895                reset_mask |= RADEON_RESET_DMA1;
4896
4897        /* SRBM_STATUS */
4898        tmp = RREG32(SRBM_STATUS);
4899
4900        if (tmp & IH_BUSY)
4901                reset_mask |= RADEON_RESET_IH;
4902
4903        if (tmp & SEM_BUSY)
4904                reset_mask |= RADEON_RESET_SEM;
4905
4906        if (tmp & GRBM_RQ_PENDING)
4907                reset_mask |= RADEON_RESET_GRBM;
4908
4909        if (tmp & VMC_BUSY)
4910                reset_mask |= RADEON_RESET_VMC;
4911
4912        if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913                   MCC_BUSY | MCD_BUSY))
4914                reset_mask |= RADEON_RESET_MC;
4915
4916        if (evergreen_is_display_hung(rdev))
4917                reset_mask |= RADEON_RESET_DISPLAY;
4918
4919        /* Skip MC reset as it's mostly likely not hung, just busy */
4920        if (reset_mask & RADEON_RESET_MC) {
4921                DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922                reset_mask &= ~RADEON_RESET_MC;
4923        }
4924
4925        return reset_mask;
4926}
4927
4928/**
4929 * cik_gpu_soft_reset - soft reset GPU
4930 *
4931 * @rdev: radeon_device pointer
4932 * @reset_mask: mask of which blocks to reset
4933 *
4934 * Soft reset the blocks specified in @reset_mask.
4935 */
4936static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937{
4938        struct evergreen_mc_save save;
4939        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940        u32 tmp;
4941
4942        if (reset_mask == 0)
4943                return;
4944
4945        dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946
4947        cik_print_gpu_status_regs(rdev);
4948        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4949                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952
4953        /* disable CG/PG */
4954        cik_fini_pg(rdev);
4955        cik_fini_cg(rdev);
4956
4957        /* stop the rlc */
4958        cik_rlc_stop(rdev);
4959
4960        /* Disable GFX parsing/prefetching */
4961        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962
4963        /* Disable MEC parsing/prefetching */
4964        WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965
4966        if (reset_mask & RADEON_RESET_DMA) {
4967                /* sdma0 */
4968                tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969                tmp |= SDMA_HALT;
4970                WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971        }
4972        if (reset_mask & RADEON_RESET_DMA1) {
4973                /* sdma1 */
4974                tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975                tmp |= SDMA_HALT;
4976                WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977        }
4978
4979        evergreen_mc_stop(rdev, &save);
4980        if (evergreen_mc_wait_for_idle(rdev)) {
4981                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982        }
4983
4984        if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985                grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986
4987        if (reset_mask & RADEON_RESET_CP) {
4988                grbm_soft_reset |= SOFT_RESET_CP;
4989
4990                srbm_soft_reset |= SOFT_RESET_GRBM;
4991        }
4992
4993        if (reset_mask & RADEON_RESET_DMA)
4994                srbm_soft_reset |= SOFT_RESET_SDMA;
4995
4996        if (reset_mask & RADEON_RESET_DMA1)
4997                srbm_soft_reset |= SOFT_RESET_SDMA1;
4998
4999        if (reset_mask & RADEON_RESET_DISPLAY)
5000                srbm_soft_reset |= SOFT_RESET_DC;
5001
5002        if (reset_mask & RADEON_RESET_RLC)
5003                grbm_soft_reset |= SOFT_RESET_RLC;
5004
5005        if (reset_mask & RADEON_RESET_SEM)
5006                srbm_soft_reset |= SOFT_RESET_SEM;
5007
5008        if (reset_mask & RADEON_RESET_IH)
5009                srbm_soft_reset |= SOFT_RESET_IH;
5010
5011        if (reset_mask & RADEON_RESET_GRBM)
5012                srbm_soft_reset |= SOFT_RESET_GRBM;
5013
5014        if (reset_mask & RADEON_RESET_VMC)
5015                srbm_soft_reset |= SOFT_RESET_VMC;
5016
5017        if (!(rdev->flags & RADEON_IS_IGP)) {
5018                if (reset_mask & RADEON_RESET_MC)
5019                        srbm_soft_reset |= SOFT_RESET_MC;
5020        }
5021
5022        if (grbm_soft_reset) {
5023                tmp = RREG32(GRBM_SOFT_RESET);
5024                tmp |= grbm_soft_reset;
5025                dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026                WREG32(GRBM_SOFT_RESET, tmp);
5027                tmp = RREG32(GRBM_SOFT_RESET);
5028
5029                udelay(50);
5030
5031                tmp &= ~grbm_soft_reset;
5032                WREG32(GRBM_SOFT_RESET, tmp);
5033                tmp = RREG32(GRBM_SOFT_RESET);
5034        }
5035
5036        if (srbm_soft_reset) {
5037                tmp = RREG32(SRBM_SOFT_RESET);
5038                tmp |= srbm_soft_reset;
5039                dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040                WREG32(SRBM_SOFT_RESET, tmp);
5041                tmp = RREG32(SRBM_SOFT_RESET);
5042
5043                udelay(50);
5044
5045                tmp &= ~srbm_soft_reset;
5046                WREG32(SRBM_SOFT_RESET, tmp);
5047                tmp = RREG32(SRBM_SOFT_RESET);
5048        }
5049
5050        /* Wait a little for things to settle down */
5051        udelay(50);
5052
5053        evergreen_mc_resume(rdev, &save);
5054        udelay(50);
5055
5056        cik_print_gpu_status_regs(rdev);
5057}
5058
5059struct kv_reset_save_regs {
5060        u32 gmcon_reng_execute;
5061        u32 gmcon_misc;
5062        u32 gmcon_misc3;
5063};
5064
5065static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066                                   struct kv_reset_save_regs *save)
5067{
5068        save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069        save->gmcon_misc = RREG32(GMCON_MISC);
5070        save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071
5072        WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073        WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074                                                STCTRL_STUTTER_EN));
5075}
5076
5077static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078                                      struct kv_reset_save_regs *save)
5079{
5080        int i;
5081
5082        WREG32(GMCON_PGFSM_WRITE, 0);
5083        WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084
5085        for (i = 0; i < 5; i++)
5086                WREG32(GMCON_PGFSM_WRITE, 0);
5087
5088        WREG32(GMCON_PGFSM_WRITE, 0);
5089        WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090
5091        for (i = 0; i < 5; i++)
5092                WREG32(GMCON_PGFSM_WRITE, 0);
5093
5094        WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095        WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096
5097        for (i = 0; i < 5; i++)
5098                WREG32(GMCON_PGFSM_WRITE, 0);
5099
5100        WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101        WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102
5103        for (i = 0; i < 5; i++)
5104                WREG32(GMCON_PGFSM_WRITE, 0);
5105
5106        WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107        WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108
5109        for (i = 0; i < 5; i++)
5110                WREG32(GMCON_PGFSM_WRITE, 0);
5111
5112        WREG32(GMCON_PGFSM_WRITE, 0);
5113        WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114
5115        for (i = 0; i < 5; i++)
5116                WREG32(GMCON_PGFSM_WRITE, 0);
5117
5118        WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119        WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120
5121        for (i = 0; i < 5; i++)
5122                WREG32(GMCON_PGFSM_WRITE, 0);
5123
5124        WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125        WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126
5127        for (i = 0; i < 5; i++)
5128                WREG32(GMCON_PGFSM_WRITE, 0);
5129
5130        WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131        WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132
5133        for (i = 0; i < 5; i++)
5134                WREG32(GMCON_PGFSM_WRITE, 0);
5135
5136        WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137        WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138
5139        for (i = 0; i < 5; i++)
5140                WREG32(GMCON_PGFSM_WRITE, 0);
5141
5142        WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143        WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144
5145        WREG32(GMCON_MISC3, save->gmcon_misc3);
5146        WREG32(GMCON_MISC, save->gmcon_misc);
5147        WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148}
5149
5150static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151{
5152        struct evergreen_mc_save save;
5153        struct kv_reset_save_regs kv_save = { 0 };
5154        u32 tmp, i;
5155
5156        dev_info(rdev->dev, "GPU pci config reset\n");
5157
5158        /* disable dpm? */
5159
5160        /* disable cg/pg */
5161        cik_fini_pg(rdev);
5162        cik_fini_cg(rdev);
5163
5164        /* Disable GFX parsing/prefetching */
5165        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166
5167        /* Disable MEC parsing/prefetching */
5168        WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169
5170        /* sdma0 */
5171        tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172        tmp |= SDMA_HALT;
5173        WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174        /* sdma1 */
5175        tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176        tmp |= SDMA_HALT;
5177        WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178        /* XXX other engines? */
5179
5180        /* halt the rlc, disable cp internal ints */
5181        cik_rlc_stop(rdev);
5182
5183        udelay(50);
5184
5185        /* disable mem access */
5186        evergreen_mc_stop(rdev, &save);
5187        if (evergreen_mc_wait_for_idle(rdev)) {
5188                dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189        }
5190
5191        if (rdev->flags & RADEON_IS_IGP)
5192                kv_save_regs_for_reset(rdev, &kv_save);
5193
5194        /* disable BM */
5195        pci_clear_master(rdev->pdev);
5196        /* reset */
5197        radeon_pci_config_reset(rdev);
5198
5199        udelay(100);
5200
5201        /* wait for asic to come out of reset */
5202        for (i = 0; i < rdev->usec_timeout; i++) {
5203                if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204                        break;
5205                udelay(1);
5206        }
5207
5208        /* does asic init need to be run first??? */
5209        if (rdev->flags & RADEON_IS_IGP)
5210                kv_restore_regs_for_reset(rdev, &kv_save);
5211}
5212
5213/**
5214 * cik_asic_reset - soft reset GPU
5215 *
5216 * @rdev: radeon_device pointer
5217 * @hard: force hard reset
5218 *
5219 * Look up which blocks are hung and attempt
5220 * to reset them.
5221 * Returns 0 for success.
5222 */
5223int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224{
5225        u32 reset_mask;
5226
5227        if (hard) {
5228                cik_gpu_pci_config_reset(rdev);
5229                return 0;
5230        }
5231
5232        reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234        if (reset_mask)
5235                r600_set_bios_scratch_engine_hung(rdev, true);
5236
5237        /* try soft reset */
5238        cik_gpu_soft_reset(rdev, reset_mask);
5239
5240        reset_mask = cik_gpu_check_soft_reset(rdev);
5241
5242        /* try pci config reset */
5243        if (reset_mask && radeon_hard_reset)
5244                cik_gpu_pci_config_reset(rdev);
5245
5246        reset_mask = cik_gpu_check_soft_reset(rdev);
5247
5248        if (!reset_mask)
5249                r600_set_bios_scratch_engine_hung(rdev, false);
5250
5251        return 0;
5252}
5253
5254/**
5255 * cik_gfx_is_lockup - check if the 3D engine is locked up
5256 *
5257 * @rdev: radeon_device pointer
5258 * @ring: radeon_ring structure holding ring information
5259 *
5260 * Check if the 3D engine is locked up (CIK).
5261 * Returns true if the engine is locked, false if not.
5262 */
5263bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264{
5265        u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266
5267        if (!(reset_mask & (RADEON_RESET_GFX |
5268                            RADEON_RESET_COMPUTE |
5269                            RADEON_RESET_CP))) {
5270                radeon_ring_lockup_update(rdev, ring);
5271                return false;
5272        }
5273        return radeon_ring_test_lockup(rdev, ring);
5274}
5275
5276/* MC */
5277/**
5278 * cik_mc_program - program the GPU memory controller
5279 *
5280 * @rdev: radeon_device pointer
5281 *
5282 * Set the location of vram, gart, and AGP in the GPU's
5283 * physical address space (CIK).
5284 */
5285static void cik_mc_program(struct radeon_device *rdev)
5286{
5287        struct evergreen_mc_save save;
5288        u32 tmp;
5289        int i, j;
5290
5291        /* Initialize HDP */
5292        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293                WREG32((0x2c14 + j), 0x00000000);
5294                WREG32((0x2c18 + j), 0x00000000);
5295                WREG32((0x2c1c + j), 0x00000000);
5296                WREG32((0x2c20 + j), 0x00000000);
5297                WREG32((0x2c24 + j), 0x00000000);
5298        }
5299        WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300
5301        evergreen_mc_stop(rdev, &save);
5302        if (radeon_mc_wait_for_idle(rdev)) {
5303                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304        }
5305        /* Lockout access through VGA aperture*/
5306        WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307        /* Update configuration */
5308        WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309               rdev->mc.vram_start >> 12);
5310        WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311               rdev->mc.vram_end >> 12);
5312        WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313               rdev->vram_scratch.gpu_addr >> 12);
5314        tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315        tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316        WREG32(MC_VM_FB_LOCATION, tmp);
5317        /* XXX double check these! */
5318        WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319        WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320        WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321        WREG32(MC_VM_AGP_BASE, 0);
5322        WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323        WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324        if (radeon_mc_wait_for_idle(rdev)) {
5325                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326        }
5327        evergreen_mc_resume(rdev, &save);
5328        /* we need to own VRAM, so turn off the VGA renderer here
5329         * to stop it overwriting our objects */
5330        rv515_vga_render_disable(rdev);
5331}
5332
5333/**
5334 * cik_mc_init - initialize the memory controller driver params
5335 *
5336 * @rdev: radeon_device pointer
5337 *
5338 * Look up the amount of vram, vram width, and decide how to place
5339 * vram and gart within the GPU's physical address space (CIK).
5340 * Returns 0 for success.
5341 */
5342static int cik_mc_init(struct radeon_device *rdev)
5343{
5344        u32 tmp;
5345        int chansize, numchan;
5346
5347        /* Get VRAM informations */
5348        rdev->mc.vram_is_ddr = true;
5349        tmp = RREG32(MC_ARB_RAMCFG);
5350        if (tmp & CHANSIZE_MASK) {
5351                chansize = 64;
5352        } else {
5353                chansize = 32;
5354        }
5355        tmp = RREG32(MC_SHARED_CHMAP);
5356        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357        case 0:
5358        default:
5359                numchan = 1;
5360                break;
5361        case 1:
5362                numchan = 2;
5363                break;
5364        case 2:
5365                numchan = 4;
5366                break;
5367        case 3:
5368                numchan = 8;
5369                break;
5370        case 4:
5371                numchan = 3;
5372                break;
5373        case 5:
5374                numchan = 6;
5375                break;
5376        case 6:
5377                numchan = 10;
5378                break;
5379        case 7:
5380                numchan = 12;
5381                break;
5382        case 8:
5383                numchan = 16;
5384                break;
5385        }
5386        rdev->mc.vram_width = numchan * chansize;
5387        /* Could aper size report 0 ? */
5388        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390        /* size in MB on si */
5391        rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392        rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393        rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394        si_vram_gtt_location(rdev, &rdev->mc);
5395        radeon_update_bandwidth_info(rdev);
5396
5397        return 0;
5398}
5399
5400/*
5401 * GART
5402 * VMID 0 is the physical GPU addresses as used by the kernel.
5403 * VMIDs 1-15 are used for userspace clients and are handled
5404 * by the radeon vm/hsa code.
5405 */
5406/**
5407 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408 *
5409 * @rdev: radeon_device pointer
5410 *
5411 * Flush the TLB for the VMID 0 page table (CIK).
5412 */
5413void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414{
5415        /* flush hdp cache */
5416        WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417
5418        /* bits 0-15 are the VM contexts0-15 */
5419        WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420}
5421
5422/**
5423 * cik_pcie_gart_enable - gart enable
5424 *
5425 * @rdev: radeon_device pointer
5426 *
5427 * This sets up the TLBs, programs the page tables for VMID0,
5428 * sets up the hw for VMIDs 1-15 which are allocated on
5429 * demand, and sets up the global locations for the LDS, GDS,
5430 * and GPUVM for FSA64 clients (CIK).
5431 * Returns 0 for success, errors for failure.
5432 */
5433static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434{
5435        int r, i;
5436
5437        if (rdev->gart.robj == NULL) {
5438                dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439                return -EINVAL;
5440        }
5441        r = radeon_gart_table_vram_pin(rdev);
5442        if (r)
5443                return r;
5444        /* Setup TLB control */
5445        WREG32(MC_VM_MX_L1_TLB_CNTL,
5446               (0xA << 7) |
5447               ENABLE_L1_TLB |
5448               ENABLE_L1_FRAGMENT_PROCESSING |
5449               SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450               ENABLE_ADVANCED_DRIVER_MODEL |
5451               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452        /* Setup L2 cache */
5453        WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454               ENABLE_L2_FRAGMENT_PROCESSING |
5455               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457               EFFECTIVE_L2_QUEUE_SIZE(7) |
5458               CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459        WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461               BANK_SELECT(4) |
5462               L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463        /* setup context0 */
5464        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465        WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468                        (u32)(rdev->dummy_page.addr >> 12));
5469        WREG32(VM_CONTEXT0_CNTL2, 0);
5470        WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471                                  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472
5473        WREG32(0x15D4, 0);
5474        WREG32(0x15D8, 0);
5475        WREG32(0x15DC, 0);
5476
5477        /* restore context1-15 */
5478        /* set vm size, must be a multiple of 4 */
5479        WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480        WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481        for (i = 1; i < 16; i++) {
5482                if (i < 8)
5483                        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484                               rdev->vm_manager.saved_table_addr[i]);
5485                else
5486                        WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487                               rdev->vm_manager.saved_table_addr[i]);
5488        }
5489
5490        /* enable context1-15 */
5491        WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492               (u32)(rdev->dummy_page.addr >> 12));
5493        WREG32(VM_CONTEXT1_CNTL2, 4);
5494        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495                                PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500                                PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501                                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502                                VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503                                VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504                                READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505                                READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506                                WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507                                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508
5509        if (rdev->family == CHIP_KAVERI) {
5510                u32 tmp = RREG32(CHUB_CONTROL);
5511                tmp &= ~BYPASS_VM;
5512                WREG32(CHUB_CONTROL, tmp);
5513        }
5514
5515        /* XXX SH_MEM regs */
5516        /* where to put LDS, scratch, GPUVM in FSA64 space */
5517        mutex_lock(&rdev->srbm_mutex);
5518        for (i = 0; i < 16; i++) {
5519                cik_srbm_select(rdev, 0, 0, 0, i);
5520                /* CP and shaders */
5521                WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522                WREG32(SH_MEM_APE1_BASE, 1);
5523                WREG32(SH_MEM_APE1_LIMIT, 0);
5524                WREG32(SH_MEM_BASES, 0);
5525                /* SDMA GFX */
5526                WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527                WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528                WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529                WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530                /* XXX SDMA RLC - todo */
5531        }
5532        cik_srbm_select(rdev, 0, 0, 0, 0);
5533        mutex_unlock(&rdev->srbm_mutex);
5534
5535        cik_pcie_gart_tlb_flush(rdev);
5536        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537                 (unsigned)(rdev->mc.gtt_size >> 20),
5538                 (unsigned long long)rdev->gart.table_addr);
5539        rdev->gart.ready = true;
5540        return 0;
5541}
5542
5543/**
5544 * cik_pcie_gart_disable - gart disable
5545 *
5546 * @rdev: radeon_device pointer
5547 *
5548 * This disables all VM page table (CIK).
5549 */
5550static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551{
5552        unsigned i;
5553
5554        for (i = 1; i < 16; ++i) {
5555                uint32_t reg;
5556                if (i < 8)
5557                        reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558                else
5559                        reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560                rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561        }
5562
5563        /* Disable all tables */
5564        WREG32(VM_CONTEXT0_CNTL, 0);
5565        WREG32(VM_CONTEXT1_CNTL, 0);
5566        /* Setup TLB control */
5567        WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569        /* Setup L2 cache */
5570        WREG32(VM_L2_CNTL,
5571               ENABLE_L2_FRAGMENT_PROCESSING |
5572               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574               EFFECTIVE_L2_QUEUE_SIZE(7) |
5575               CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576        WREG32(VM_L2_CNTL2, 0);
5577        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578               L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579        radeon_gart_table_vram_unpin(rdev);
5580}
5581
5582/**
5583 * cik_pcie_gart_fini - vm fini callback
5584 *
5585 * @rdev: radeon_device pointer
5586 *
5587 * Tears down the driver GART/VM setup (CIK).
5588 */
5589static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590{
5591        cik_pcie_gart_disable(rdev);
5592        radeon_gart_table_vram_free(rdev);
5593        radeon_gart_fini(rdev);
5594}
5595
5596/* vm parser */
5597/**
5598 * cik_ib_parse - vm ib_parse callback
5599 *
5600 * @rdev: radeon_device pointer
5601 * @ib: indirect buffer pointer
5602 *
5603 * CIK uses hw IB checking so this is a nop (CIK).
5604 */
5605int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606{
5607        return 0;
5608}
5609
5610/*
5611 * vm
5612 * VMID 0 is the physical GPU addresses as used by the kernel.
5613 * VMIDs 1-15 are used for userspace clients and are handled
5614 * by the radeon vm/hsa code.
5615 */
5616/**
5617 * cik_vm_init - cik vm init callback
5618 *
5619 * @rdev: radeon_device pointer
5620 *
5621 * Inits cik specific vm parameters (number of VMs, base of vram for
5622 * VMIDs 1-15) (CIK).
5623 * Returns 0 for success.
5624 */
5625int cik_vm_init(struct radeon_device *rdev)
5626{
5627        /*
5628         * number of VMs
5629         * VMID 0 is reserved for System
5630         * radeon graphics/compute will use VMIDs 1-15
5631         */
5632        rdev->vm_manager.nvm = 16;
5633        /* base offset of vram pages */
5634        if (rdev->flags & RADEON_IS_IGP) {
5635                u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636                tmp <<= 22;
5637                rdev->vm_manager.vram_base_offset = tmp;
5638        } else
5639                rdev->vm_manager.vram_base_offset = 0;
5640
5641        return 0;
5642}
5643
5644/**
5645 * cik_vm_fini - cik vm fini callback
5646 *
5647 * @rdev: radeon_device pointer
5648 *
5649 * Tear down any asic specific VM setup (CIK).
5650 */
5651void cik_vm_fini(struct radeon_device *rdev)
5652{
5653}
5654
5655/**
5656 * cik_vm_decode_fault - print human readable fault info
5657 *
5658 * @rdev: radeon_device pointer
5659 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5661 *
5662 * Print human readable fault information (CIK).
5663 */
5664static void cik_vm_decode_fault(struct radeon_device *rdev,
5665                                u32 status, u32 addr, u32 mc_client)
5666{
5667        u32 mc_id;
5668        u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669        u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670        char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671                (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672
5673        if (rdev->family == CHIP_HAWAII)
5674                mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675        else
5676                mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677
5678        printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679               protections, vmid, addr,
5680               (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681               block, mc_client, mc_id);
5682}
5683
5684/**
5685 * cik_vm_flush - cik vm flush using the CP
5686 *
5687 * @rdev: radeon_device pointer
5688 *
5689 * Update the page table base and flush the VM TLB
5690 * using the CP (CIK).
5691 */
5692void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693                  unsigned vm_id, uint64_t pd_addr)
5694{
5695        int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696
5697        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699                                 WRITE_DATA_DST_SEL(0)));
5700        if (vm_id < 8) {
5701                radeon_ring_write(ring,
5702                                  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703        } else {
5704                radeon_ring_write(ring,
5705                                  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706        }
5707        radeon_ring_write(ring, 0);
5708        radeon_ring_write(ring, pd_addr >> 12);
5709
5710        /* update SH_MEM_* regs */
5711        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713                                 WRITE_DATA_DST_SEL(0)));
5714        radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715        radeon_ring_write(ring, 0);
5716        radeon_ring_write(ring, VMID(vm_id));
5717
5718        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720                                 WRITE_DATA_DST_SEL(0)));
5721        radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722        radeon_ring_write(ring, 0);
5723
5724        radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725        radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726        radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727        radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728
5729        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731                                 WRITE_DATA_DST_SEL(0)));
5732        radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733        radeon_ring_write(ring, 0);
5734        radeon_ring_write(ring, VMID(0));
5735
5736        /* HDP flush */
5737        cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738
5739        /* bits 0-15 are the VM contexts0-15 */
5740        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742                                 WRITE_DATA_DST_SEL(0)));
5743        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744        radeon_ring_write(ring, 0);
5745        radeon_ring_write(ring, 1 << vm_id);
5746
5747        /* wait for the invalidate to complete */
5748        radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749        radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5751                                 WAIT_REG_MEM_ENGINE(0))); /* me */
5752        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753        radeon_ring_write(ring, 0);
5754        radeon_ring_write(ring, 0); /* ref */
5755        radeon_ring_write(ring, 0); /* mask */
5756        radeon_ring_write(ring, 0x20); /* poll interval */
5757
5758        /* compute doesn't have PFP */
5759        if (usepfp) {
5760                /* sync PFP to ME, otherwise we might get invalid PFP reads */
5761                radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762                radeon_ring_write(ring, 0x0);
5763        }
5764}
5765
5766/*
5767 * RLC
5768 * The RLC is a multi-purpose microengine that handles a
5769 * variety of functions, the most important of which is
5770 * the interrupt controller.
5771 */
5772static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773                                          bool enable)
5774{
5775        u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776
5777        if (enable)
5778                tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779        else
5780                tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781        WREG32(CP_INT_CNTL_RING0, tmp);
5782}
5783
5784static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785{
5786        u32 tmp;
5787
5788        tmp = RREG32(RLC_LB_CNTL);
5789        if (enable)
5790                tmp |= LOAD_BALANCE_ENABLE;
5791        else
5792                tmp &= ~LOAD_BALANCE_ENABLE;
5793        WREG32(RLC_LB_CNTL, tmp);
5794}
5795
5796static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797{
5798        u32 i, j, k;
5799        u32 mask;
5800
5801        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803                        cik_select_se_sh(rdev, i, j);
5804                        for (k = 0; k < rdev->usec_timeout; k++) {
5805                                if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806                                        break;
5807                                udelay(1);
5808                        }
5809                }
5810        }
5811        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812
5813        mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814        for (k = 0; k < rdev->usec_timeout; k++) {
5815                if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816                        break;
5817                udelay(1);
5818        }
5819}
5820
5821static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822{
5823        u32 tmp;
5824
5825        tmp = RREG32(RLC_CNTL);
5826        if (tmp != rlc)
5827                WREG32(RLC_CNTL, rlc);
5828}
5829
5830static u32 cik_halt_rlc(struct radeon_device *rdev)
5831{
5832        u32 data, orig;
5833
5834        orig = data = RREG32(RLC_CNTL);
5835
5836        if (data & RLC_ENABLE) {
5837                u32 i;
5838
5839                data &= ~RLC_ENABLE;
5840                WREG32(RLC_CNTL, data);
5841
5842                for (i = 0; i < rdev->usec_timeout; i++) {
5843                        if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844                                break;
5845                        udelay(1);
5846                }
5847
5848                cik_wait_for_rlc_serdes(rdev);
5849        }
5850
5851        return orig;
5852}
5853
5854void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855{
5856        u32 tmp, i, mask;
5857
5858        tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859        WREG32(RLC_GPR_REG2, tmp);
5860
5861        mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862        for (i = 0; i < rdev->usec_timeout; i++) {
5863                if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864                        break;
5865                udelay(1);
5866        }
5867
5868        for (i = 0; i < rdev->usec_timeout; i++) {
5869                if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870                        break;
5871                udelay(1);
5872        }
5873}
5874
5875void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876{
5877        u32 tmp;
5878
5879        tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880        WREG32(RLC_GPR_REG2, tmp);
5881}
5882
5883/**
5884 * cik_rlc_stop - stop the RLC ME
5885 *
5886 * @rdev: radeon_device pointer
5887 *
5888 * Halt the RLC ME (MicroEngine) (CIK).
5889 */
5890static void cik_rlc_stop(struct radeon_device *rdev)
5891{
5892        WREG32(RLC_CNTL, 0);
5893
5894        cik_enable_gui_idle_interrupt(rdev, false);
5895
5896        cik_wait_for_rlc_serdes(rdev);
5897}
5898
5899/**
5900 * cik_rlc_start - start the RLC ME
5901 *
5902 * @rdev: radeon_device pointer
5903 *
5904 * Unhalt the RLC ME (MicroEngine) (CIK).
5905 */
5906static void cik_rlc_start(struct radeon_device *rdev)
5907{
5908        WREG32(RLC_CNTL, RLC_ENABLE);
5909
5910        cik_enable_gui_idle_interrupt(rdev, true);
5911
5912        udelay(50);
5913}
5914
5915/**
5916 * cik_rlc_resume - setup the RLC hw
5917 *
5918 * @rdev: radeon_device pointer
5919 *
5920 * Initialize the RLC registers, load the ucode,
5921 * and start the RLC (CIK).
5922 * Returns 0 for success, -EINVAL if the ucode is not available.
5923 */
5924static int cik_rlc_resume(struct radeon_device *rdev)
5925{
5926        u32 i, size, tmp;
5927
5928        if (!rdev->rlc_fw)
5929                return -EINVAL;
5930
5931        cik_rlc_stop(rdev);
5932
5933        /* disable CG */
5934        tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935        WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936
5937        si_rlc_reset(rdev);
5938
5939        cik_init_pg(rdev);
5940
5941        cik_init_cg(rdev);
5942
5943        WREG32(RLC_LB_CNTR_INIT, 0);
5944        WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945
5946        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947        WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948        WREG32(RLC_LB_PARAMS, 0x00600408);
5949        WREG32(RLC_LB_CNTL, 0x80000004);
5950
5951        WREG32(RLC_MC_CNTL, 0);
5952        WREG32(RLC_UCODE_CNTL, 0);
5953
5954        if (rdev->new_fw) {
5955                const struct rlc_firmware_header_v1_0 *hdr =
5956                        (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957                const __le32 *fw_data = (const __le32 *)
5958                        (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959
5960                radeon_ucode_print_rlc_hdr(&hdr->header);
5961
5962                size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963                WREG32(RLC_GPM_UCODE_ADDR, 0);
5964                for (i = 0; i < size; i++)
5965                        WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966                WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967        } else {
5968                const __be32 *fw_data;
5969
5970                switch (rdev->family) {
5971                case CHIP_BONAIRE:
5972                case CHIP_HAWAII:
5973                default:
5974                        size = BONAIRE_RLC_UCODE_SIZE;
5975                        break;
5976                case CHIP_KAVERI:
5977                        size = KV_RLC_UCODE_SIZE;
5978                        break;
5979                case CHIP_KABINI:
5980                        size = KB_RLC_UCODE_SIZE;
5981                        break;
5982                case CHIP_MULLINS:
5983                        size = ML_RLC_UCODE_SIZE;
5984                        break;
5985                }
5986
5987                fw_data = (const __be32 *)rdev->rlc_fw->data;
5988                WREG32(RLC_GPM_UCODE_ADDR, 0);
5989                for (i = 0; i < size; i++)
5990                        WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991                WREG32(RLC_GPM_UCODE_ADDR, 0);
5992        }
5993
5994        /* XXX - find out what chips support lbpw */
5995        cik_enable_lbpw(rdev, false);
5996
5997        if (rdev->family == CHIP_BONAIRE)
5998                WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999
6000        cik_rlc_start(rdev);
6001
6002        return 0;
6003}
6004
6005static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006{
6007        u32 data, orig, tmp, tmp2;
6008
6009        orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010
6011        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012                cik_enable_gui_idle_interrupt(rdev, true);
6013
6014                tmp = cik_halt_rlc(rdev);
6015
6016                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019                tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020                WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021
6022                cik_update_rlc(rdev, tmp);
6023
6024                data |= CGCG_EN | CGLS_EN;
6025        } else {
6026                cik_enable_gui_idle_interrupt(rdev, false);
6027
6028                RREG32(CB_CGTT_SCLK_CTRL);
6029                RREG32(CB_CGTT_SCLK_CTRL);
6030                RREG32(CB_CGTT_SCLK_CTRL);
6031                RREG32(CB_CGTT_SCLK_CTRL);
6032
6033                data &= ~(CGCG_EN | CGLS_EN);
6034        }
6035
6036        if (orig != data)
6037                WREG32(RLC_CGCG_CGLS_CTRL, data);
6038
6039}
6040
6041static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042{
6043        u32 data, orig, tmp = 0;
6044
6045        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046                if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047                        if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048                                orig = data = RREG32(CP_MEM_SLP_CNTL);
6049                                data |= CP_MEM_LS_EN;
6050                                if (orig != data)
6051                                        WREG32(CP_MEM_SLP_CNTL, data);
6052                        }
6053                }
6054
6055                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056                data |= 0x00000001;
6057                data &= 0xfffffffd;
6058                if (orig != data)
6059                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060
6061                tmp = cik_halt_rlc(rdev);
6062
6063                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066                data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067                WREG32(RLC_SERDES_WR_CTRL, data);
6068
6069                cik_update_rlc(rdev, tmp);
6070
6071                if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072                        orig = data = RREG32(CGTS_SM_CTRL_REG);
6073                        data &= ~SM_MODE_MASK;
6074                        data |= SM_MODE(0x2);
6075                        data |= SM_MODE_ENABLE;
6076                        data &= ~CGTS_OVERRIDE;
6077                        if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078                            (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079                                data &= ~CGTS_LS_OVERRIDE;
6080                        data &= ~ON_MONITOR_ADD_MASK;
6081                        data |= ON_MONITOR_ADD_EN;
6082                        data |= ON_MONITOR_ADD(0x96);
6083                        if (orig != data)
6084                                WREG32(CGTS_SM_CTRL_REG, data);
6085                }
6086        } else {
6087                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088                data |= 0x00000003;
6089                if (orig != data)
6090                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091
6092                data = RREG32(RLC_MEM_SLP_CNTL);
6093                if (data & RLC_MEM_LS_EN) {
6094                        data &= ~RLC_MEM_LS_EN;
6095                        WREG32(RLC_MEM_SLP_CNTL, data);
6096                }
6097
6098                data = RREG32(CP_MEM_SLP_CNTL);
6099                if (data & CP_MEM_LS_EN) {
6100                        data &= ~CP_MEM_LS_EN;
6101                        WREG32(CP_MEM_SLP_CNTL, data);
6102                }
6103
6104                orig = data = RREG32(CGTS_SM_CTRL_REG);
6105                data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106                if (orig != data)
6107                        WREG32(CGTS_SM_CTRL_REG, data);
6108
6109                tmp = cik_halt_rlc(rdev);
6110
6111                cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112                WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113                WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114                data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115                WREG32(RLC_SERDES_WR_CTRL, data);
6116
6117                cik_update_rlc(rdev, tmp);
6118        }
6119}
6120
6121static const u32 mc_cg_registers[] =
6122{
6123        MC_HUB_MISC_HUB_CG,
6124        MC_HUB_MISC_SIP_CG,
6125        MC_HUB_MISC_VM_CG,
6126        MC_XPB_CLK_GAT,
6127        ATC_MISC_CG,
6128        MC_CITF_MISC_WR_CG,
6129        MC_CITF_MISC_RD_CG,
6130        MC_CITF_MISC_VM_CG,
6131        VM_L2_CG,
6132};
6133
6134static void cik_enable_mc_ls(struct radeon_device *rdev,
6135                             bool enable)
6136{
6137        int i;
6138        u32 orig, data;
6139
6140        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141                orig = data = RREG32(mc_cg_registers[i]);
6142                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143                        data |= MC_LS_ENABLE;
6144                else
6145                        data &= ~MC_LS_ENABLE;
6146                if (data != orig)
6147                        WREG32(mc_cg_registers[i], data);
6148        }
6149}
6150
6151static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152                               bool enable)
6153{
6154        int i;
6155        u32 orig, data;
6156
6157        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158                orig = data = RREG32(mc_cg_registers[i]);
6159                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160                        data |= MC_CG_ENABLE;
6161                else
6162                        data &= ~MC_CG_ENABLE;
6163                if (data != orig)
6164                        WREG32(mc_cg_registers[i], data);
6165        }
6166}
6167
6168static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169                                 bool enable)
6170{
6171        u32 orig, data;
6172
6173        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174                WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175                WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176        } else {
6177                orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178                data |= 0xff000000;
6179                if (data != orig)
6180                        WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181
6182                orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183                data |= 0xff000000;
6184                if (data != orig)
6185                        WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186        }
6187}
6188
6189static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190                                 bool enable)
6191{
6192        u32 orig, data;
6193
6194        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196                data |= 0x100;
6197                if (orig != data)
6198                        WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199
6200                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201                data |= 0x100;
6202                if (orig != data)
6203                        WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204        } else {
6205                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206                data &= ~0x100;
6207                if (orig != data)
6208                        WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209
6210                orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211                data &= ~0x100;
6212                if (orig != data)
6213                        WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214        }
6215}
6216
6217static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218                                bool enable)
6219{
6220        u32 orig, data;
6221
6222        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223                data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224                data = 0xfff;
6225                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226
6227                orig = data = RREG32(UVD_CGC_CTRL);
6228                data |= DCM;
6229                if (orig != data)
6230                        WREG32(UVD_CGC_CTRL, data);
6231        } else {
6232                data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233                data &= ~0xfff;
6234                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235
6236                orig = data = RREG32(UVD_CGC_CTRL);
6237                data &= ~DCM;
6238                if (orig != data)
6239                        WREG32(UVD_CGC_CTRL, data);
6240        }
6241}
6242
6243static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244                               bool enable)
6245{
6246        u32 orig, data;
6247
6248        orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249
6250        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251                data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252                        REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253        else
6254                data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255                          REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256
6257        if (orig != data)
6258                WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259}
6260
6261static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262                                bool enable)
6263{
6264        u32 orig, data;
6265
6266        orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267
6268        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269                data &= ~CLOCK_GATING_DIS;
6270        else
6271                data |= CLOCK_GATING_DIS;
6272
6273        if (orig != data)
6274                WREG32(HDP_HOST_PATH_CNTL, data);
6275}
6276
6277static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278                              bool enable)
6279{
6280        u32 orig, data;
6281
6282        orig = data = RREG32(HDP_MEM_POWER_LS);
6283
6284        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285                data |= HDP_LS_ENABLE;
6286        else
6287                data &= ~HDP_LS_ENABLE;
6288
6289        if (orig != data)
6290                WREG32(HDP_MEM_POWER_LS, data);
6291}
6292
6293void cik_update_cg(struct radeon_device *rdev,
6294                   u32 block, bool enable)
6295{
6296
6297        if (block & RADEON_CG_BLOCK_GFX) {
6298                cik_enable_gui_idle_interrupt(rdev, false);
6299                /* order matters! */
6300                if (enable) {
6301                        cik_enable_mgcg(rdev, true);
6302                        cik_enable_cgcg(rdev, true);
6303                } else {
6304                        cik_enable_cgcg(rdev, false);
6305                        cik_enable_mgcg(rdev, false);
6306                }
6307                cik_enable_gui_idle_interrupt(rdev, true);
6308        }
6309
6310        if (block & RADEON_CG_BLOCK_MC) {
6311                if (!(rdev->flags & RADEON_IS_IGP)) {
6312                        cik_enable_mc_mgcg(rdev, enable);
6313                        cik_enable_mc_ls(rdev, enable);
6314                }
6315        }
6316
6317        if (block & RADEON_CG_BLOCK_SDMA) {
6318                cik_enable_sdma_mgcg(rdev, enable);
6319                cik_enable_sdma_mgls(rdev, enable);
6320        }
6321
6322        if (block & RADEON_CG_BLOCK_BIF) {
6323                cik_enable_bif_mgls(rdev, enable);
6324        }
6325
6326        if (block & RADEON_CG_BLOCK_UVD) {
6327                if (rdev->has_uvd)
6328                        cik_enable_uvd_mgcg(rdev, enable);
6329        }
6330
6331        if (block & RADEON_CG_BLOCK_HDP) {
6332                cik_enable_hdp_mgcg(rdev, enable);
6333                cik_enable_hdp_ls(rdev, enable);
6334        }
6335
6336        if (block & RADEON_CG_BLOCK_VCE) {
6337                vce_v2_0_enable_mgcg(rdev, enable);
6338        }
6339}
6340
6341static void cik_init_cg(struct radeon_device *rdev)
6342{
6343
6344        cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345
6346        if (rdev->has_uvd)
6347                si_init_uvd_internal_cg(rdev);
6348
6349        cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350                             RADEON_CG_BLOCK_SDMA |
6351                             RADEON_CG_BLOCK_BIF |
6352                             RADEON_CG_BLOCK_UVD |
6353                             RADEON_CG_BLOCK_HDP), true);
6354}
6355
6356static void cik_fini_cg(struct radeon_device *rdev)
6357{
6358        cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359                             RADEON_CG_BLOCK_SDMA |
6360                             RADEON_CG_BLOCK_BIF |
6361                             RADEON_CG_BLOCK_UVD |
6362                             RADEON_CG_BLOCK_HDP), false);
6363
6364        cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365}
6366
6367static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368                                          bool enable)
6369{
6370        u32 data, orig;
6371
6372        orig = data = RREG32(RLC_PG_CNTL);
6373        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374                data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375        else
6376                data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377        if (orig != data)
6378                WREG32(RLC_PG_CNTL, data);
6379}
6380
6381static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382                                          bool enable)
6383{
6384        u32 data, orig;
6385
6386        orig = data = RREG32(RLC_PG_CNTL);
6387        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388                data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389        else
6390                data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391        if (orig != data)
6392                WREG32(RLC_PG_CNTL, data);
6393}
6394
6395static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396{
6397        u32 data, orig;
6398
6399        orig = data = RREG32(RLC_PG_CNTL);
6400        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401                data &= ~DISABLE_CP_PG;
6402        else
6403                data |= DISABLE_CP_PG;
6404        if (orig != data)
6405                WREG32(RLC_PG_CNTL, data);
6406}
6407
6408static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409{
6410        u32 data, orig;
6411
6412        orig = data = RREG32(RLC_PG_CNTL);
6413        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414                data &= ~DISABLE_GDS_PG;
6415        else
6416                data |= DISABLE_GDS_PG;
6417        if (orig != data)
6418                WREG32(RLC_PG_CNTL, data);
6419}
6420
6421#define CP_ME_TABLE_SIZE    96
6422#define CP_ME_TABLE_OFFSET  2048
6423#define CP_MEC_TABLE_OFFSET 4096
6424
6425void cik_init_cp_pg_table(struct radeon_device *rdev)
6426{
6427        volatile u32 *dst_ptr;
6428        int me, i, max_me = 4;
6429        u32 bo_offset = 0;
6430        u32 table_offset, table_size;
6431
6432        if (rdev->family == CHIP_KAVERI)
6433                max_me = 5;
6434
6435        if (rdev->rlc.cp_table_ptr == NULL)
6436                return;
6437
6438        /* write the cp table buffer */
6439        dst_ptr = rdev->rlc.cp_table_ptr;
6440        for (me = 0; me < max_me; me++) {
6441                if (rdev->new_fw) {
6442                        const __le32 *fw_data;
6443                        const struct gfx_firmware_header_v1_0 *hdr;
6444
6445                        if (me == 0) {
6446                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447                                fw_data = (const __le32 *)
6448                                        (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449                                table_offset = le32_to_cpu(hdr->jt_offset);
6450                                table_size = le32_to_cpu(hdr->jt_size);
6451                        } else if (me == 1) {
6452                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453                                fw_data = (const __le32 *)
6454                                        (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455                                table_offset = le32_to_cpu(hdr->jt_offset);
6456                                table_size = le32_to_cpu(hdr->jt_size);
6457                        } else if (me == 2) {
6458                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459                                fw_data = (const __le32 *)
6460                                        (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461                                table_offset = le32_to_cpu(hdr->jt_offset);
6462                                table_size = le32_to_cpu(hdr->jt_size);
6463                        } else if (me == 3) {
6464                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465                                fw_data = (const __le32 *)
6466                                        (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467                                table_offset = le32_to_cpu(hdr->jt_offset);
6468                                table_size = le32_to_cpu(hdr->jt_size);
6469                        } else {
6470                                hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471                                fw_data = (const __le32 *)
6472                                        (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473                                table_offset = le32_to_cpu(hdr->jt_offset);
6474                                table_size = le32_to_cpu(hdr->jt_size);
6475                        }
6476
6477                        for (i = 0; i < table_size; i ++) {
6478                                dst_ptr[bo_offset + i] =
6479                                        cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480                        }
6481                        bo_offset += table_size;
6482                } else {
6483                        const __be32 *fw_data;
6484                        table_size = CP_ME_TABLE_SIZE;
6485
6486                        if (me == 0) {
6487                                fw_data = (const __be32 *)rdev->ce_fw->data;
6488                                table_offset = CP_ME_TABLE_OFFSET;
6489                        } else if (me == 1) {
6490                                fw_data = (const __be32 *)rdev->pfp_fw->data;
6491                                table_offset = CP_ME_TABLE_OFFSET;
6492                        } else if (me == 2) {
6493                                fw_data = (const __be32 *)rdev->me_fw->data;
6494                                table_offset = CP_ME_TABLE_OFFSET;
6495                        } else {
6496                                fw_data = (const __be32 *)rdev->mec_fw->data;
6497                                table_offset = CP_MEC_TABLE_OFFSET;
6498                        }
6499
6500                        for (i = 0; i < table_size; i ++) {
6501                                dst_ptr[bo_offset + i] =
6502                                        cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503                        }
6504                        bo_offset += table_size;
6505                }
6506        }
6507}
6508
6509static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510                                bool enable)
6511{
6512        u32 data, orig;
6513
6514        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515                orig = data = RREG32(RLC_PG_CNTL);
6516                data |= GFX_PG_ENABLE;
6517                if (orig != data)
6518                        WREG32(RLC_PG_CNTL, data);
6519
6520                orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521                data |= AUTO_PG_EN;
6522                if (orig != data)
6523                        WREG32(RLC_AUTO_PG_CTRL, data);
6524        } else {
6525                orig = data = RREG32(RLC_PG_CNTL);
6526                data &= ~GFX_PG_ENABLE;
6527                if (orig != data)
6528                        WREG32(RLC_PG_CNTL, data);
6529
6530                orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531                data &= ~AUTO_PG_EN;
6532                if (orig != data)
6533                        WREG32(RLC_AUTO_PG_CTRL, data);
6534
6535                data = RREG32(DB_RENDER_CONTROL);
6536        }
6537}
6538
6539static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540{
6541        u32 mask = 0, tmp, tmp1;
6542        int i;
6543
6544        cik_select_se_sh(rdev, se, sh);
6545        tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546        tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547        cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548
6549        tmp &= 0xffff0000;
6550
6551        tmp |= tmp1;
6552        tmp >>= 16;
6553
6554        for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555                mask <<= 1;
6556                mask |= 1;
6557        }
6558
6559        return (~tmp) & mask;
6560}
6561
6562static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563{
6564        u32 i, j, k, active_cu_number = 0;
6565        u32 mask, counter, cu_bitmap;
6566        u32 tmp = 0;
6567
6568        for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569                for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570                        mask = 1;
6571                        cu_bitmap = 0;
6572                        counter = 0;
6573                        for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574                                if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575                                        if (counter < 2)
6576                                                cu_bitmap |= mask;
6577                                        counter ++;
6578                                }
6579                                mask <<= 1;
6580                        }
6581
6582                        active_cu_number += counter;
6583                        tmp |= (cu_bitmap << (i * 16 + j * 8));
6584                }
6585        }
6586
6587        WREG32(RLC_PG_AO_CU_MASK, tmp);
6588
6589        tmp = RREG32(RLC_MAX_PG_CU);
6590        tmp &= ~MAX_PU_CU_MASK;
6591        tmp |= MAX_PU_CU(active_cu_number);
6592        WREG32(RLC_MAX_PG_CU, tmp);
6593}
6594
6595static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596                                       bool enable)
6597{
6598        u32 data, orig;
6599
6600        orig = data = RREG32(RLC_PG_CNTL);
6601        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602                data |= STATIC_PER_CU_PG_ENABLE;
6603        else
6604                data &= ~STATIC_PER_CU_PG_ENABLE;
6605        if (orig != data)
6606                WREG32(RLC_PG_CNTL, data);
6607}
6608
6609static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610                                        bool enable)
6611{
6612        u32 data, orig;
6613
6614        orig = data = RREG32(RLC_PG_CNTL);
6615        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616                data |= DYN_PER_CU_PG_ENABLE;
6617        else
6618                data &= ~DYN_PER_CU_PG_ENABLE;
6619        if (orig != data)
6620                WREG32(RLC_PG_CNTL, data);
6621}
6622
6623#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6625
6626static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627{
6628        u32 data, orig;
6629        u32 i;
6630
6631        if (rdev->rlc.cs_data) {
6632                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633                WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634                WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635                WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636        } else {
6637                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638                for (i = 0; i < 3; i++)
6639                        WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640        }
6641        if (rdev->rlc.reg_list) {
6642                WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643                for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644                        WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645        }
6646
6647        orig = data = RREG32(RLC_PG_CNTL);
6648        data |= GFX_PG_SRC;
6649        if (orig != data)
6650                WREG32(RLC_PG_CNTL, data);
6651
6652        WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653        WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654
6655        data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656        data &= ~IDLE_POLL_COUNT_MASK;
6657        data |= IDLE_POLL_COUNT(0x60);
6658        WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659
6660        data = 0x10101010;
6661        WREG32(RLC_PG_DELAY, data);
6662
6663        data = RREG32(RLC_PG_DELAY_2);
6664        data &= ~0xff;
6665        data |= 0x3;
6666        WREG32(RLC_PG_DELAY_2, data);
6667
6668        data = RREG32(RLC_AUTO_PG_CTRL);
6669        data &= ~GRBM_REG_SGIT_MASK;
6670        data |= GRBM_REG_SGIT(0x700);
6671        WREG32(RLC_AUTO_PG_CTRL, data);
6672
6673}
6674
6675static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676{
6677        cik_enable_gfx_cgpg(rdev, enable);
6678        cik_enable_gfx_static_mgpg(rdev, enable);
6679        cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680}
6681
6682u32 cik_get_csb_size(struct radeon_device *rdev)
6683{
6684        u32 count = 0;
6685        const struct cs_section_def *sect = NULL;
6686        const struct cs_extent_def *ext = NULL;
6687
6688        if (rdev->rlc.cs_data == NULL)
6689                return 0;
6690
6691        /* begin clear state */
6692        count += 2;
6693        /* context control state */
6694        count += 3;
6695
6696        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697                for (ext = sect->section; ext->extent != NULL; ++ext) {
6698                        if (sect->id == SECT_CONTEXT)
6699                                count += 2 + ext->reg_count;
6700                        else
6701                                return 0;
6702                }
6703        }
6704        /* pa_sc_raster_config/pa_sc_raster_config1 */
6705        count += 4;
6706        /* end clear state */
6707        count += 2;
6708        /* clear state */
6709        count += 2;
6710
6711        return count;
6712}
6713
6714void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715{
6716        u32 count = 0, i;
6717        const struct cs_section_def *sect = NULL;
6718        const struct cs_extent_def *ext = NULL;
6719
6720        if (rdev->rlc.cs_data == NULL)
6721                return;
6722        if (buffer == NULL)
6723                return;
6724
6725        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727
6728        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729        buffer[count++] = cpu_to_le32(0x80000000);
6730        buffer[count++] = cpu_to_le32(0x80000000);
6731
6732        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733                for (ext = sect->section; ext->extent != NULL; ++ext) {
6734                        if (sect->id == SECT_CONTEXT) {
6735                                buffer[count++] =
6736                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737                                buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738                                for (i = 0; i < ext->reg_count; i++)
6739                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
6740                        } else {
6741                                return;
6742                        }
6743                }
6744        }
6745
6746        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747        buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748        switch (rdev->family) {
6749        case CHIP_BONAIRE:
6750                buffer[count++] = cpu_to_le32(0x16000012);
6751                buffer[count++] = cpu_to_le32(0x00000000);
6752                break;
6753        case CHIP_KAVERI:
6754                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755                buffer[count++] = cpu_to_le32(0x00000000);
6756                break;
6757        case CHIP_KABINI:
6758        case CHIP_MULLINS:
6759                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760                buffer[count++] = cpu_to_le32(0x00000000);
6761                break;
6762        case CHIP_HAWAII:
6763                buffer[count++] = cpu_to_le32(0x3a00161a);
6764                buffer[count++] = cpu_to_le32(0x0000002e);
6765                break;
6766        default:
6767                buffer[count++] = cpu_to_le32(0x00000000);
6768                buffer[count++] = cpu_to_le32(0x00000000);
6769                break;
6770        }
6771
6772        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774
6775        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776        buffer[count++] = cpu_to_le32(0);
6777}
6778
6779static void cik_init_pg(struct radeon_device *rdev)
6780{
6781        if (rdev->pg_flags) {
6782                cik_enable_sck_slowdown_on_pu(rdev, true);
6783                cik_enable_sck_slowdown_on_pd(rdev, true);
6784                if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785                        cik_init_gfx_cgpg(rdev);
6786                        cik_enable_cp_pg(rdev, true);
6787                        cik_enable_gds_pg(rdev, true);
6788                }
6789                cik_init_ao_cu_mask(rdev);
6790                cik_update_gfx_pg(rdev, true);
6791        }
6792}
6793
6794static void cik_fini_pg(struct radeon_device *rdev)
6795{
6796        if (rdev->pg_flags) {
6797                cik_update_gfx_pg(rdev, false);
6798                if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799                        cik_enable_cp_pg(rdev, false);
6800                        cik_enable_gds_pg(rdev, false);
6801                }
6802        }
6803}
6804
6805/*
6806 * Interrupts
6807 * Starting with r6xx, interrupts are handled via a ring buffer.
6808 * Ring buffers are areas of GPU accessible memory that the GPU
6809 * writes interrupt vectors into and the host reads vectors out of.
6810 * There is a rptr (read pointer) that determines where the
6811 * host is currently reading, and a wptr (write pointer)
6812 * which determines where the GPU has written.  When the
6813 * pointers are equal, the ring is idle.  When the GPU
6814 * writes vectors to the ring buffer, it increments the
6815 * wptr.  When there is an interrupt, the host then starts
6816 * fetching commands and processing them until the pointers are
6817 * equal again at which point it updates the rptr.
6818 */
6819
6820/**
6821 * cik_enable_interrupts - Enable the interrupt ring buffer
6822 *
6823 * @rdev: radeon_device pointer
6824 *
6825 * Enable the interrupt ring buffer (CIK).
6826 */
6827static void cik_enable_interrupts(struct radeon_device *rdev)
6828{
6829        u32 ih_cntl = RREG32(IH_CNTL);
6830        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831
6832        ih_cntl |= ENABLE_INTR;
6833        ih_rb_cntl |= IH_RB_ENABLE;
6834        WREG32(IH_CNTL, ih_cntl);
6835        WREG32(IH_RB_CNTL, ih_rb_cntl);
6836        rdev->ih.enabled = true;
6837}
6838
6839/**
6840 * cik_disable_interrupts - Disable the interrupt ring buffer
6841 *
6842 * @rdev: radeon_device pointer
6843 *
6844 * Disable the interrupt ring buffer (CIK).
6845 */
6846static void cik_disable_interrupts(struct radeon_device *rdev)
6847{
6848        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849        u32 ih_cntl = RREG32(IH_CNTL);
6850
6851        ih_rb_cntl &= ~IH_RB_ENABLE;
6852        ih_cntl &= ~ENABLE_INTR;
6853        WREG32(IH_RB_CNTL, ih_rb_cntl);
6854        WREG32(IH_CNTL, ih_cntl);
6855        /* set rptr, wptr to 0 */
6856        WREG32(IH_RB_RPTR, 0);
6857        WREG32(IH_RB_WPTR, 0);
6858        rdev->ih.enabled = false;
6859        rdev->ih.rptr = 0;
6860}
6861
6862/**
6863 * cik_disable_interrupt_state - Disable all interrupt sources
6864 *
6865 * @rdev: radeon_device pointer
6866 *
6867 * Clear all interrupt enable bits used by the driver (CIK).
6868 */
6869static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870{
6871        u32 tmp;
6872
6873        /* gfx ring */
6874        tmp = RREG32(CP_INT_CNTL_RING0) &
6875                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876        WREG32(CP_INT_CNTL_RING0, tmp);
6877        /* sdma */
6878        tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880        tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881        WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882        /* compute queues */
6883        WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884        WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885        WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886        WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887        WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888        WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889        WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890        WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891        /* grbm */
6892        WREG32(GRBM_INT_CNTL, 0);
6893        /* SRBM */
6894        WREG32(SRBM_INT_CNTL, 0);
6895        /* vline/vblank, etc. */
6896        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898        if (rdev->num_crtc >= 4) {
6899                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901        }
6902        if (rdev->num_crtc >= 6) {
6903                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905        }
6906        /* pflip */
6907        if (rdev->num_crtc >= 2) {
6908                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910        }
6911        if (rdev->num_crtc >= 4) {
6912                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914        }
6915        if (rdev->num_crtc >= 6) {
6916                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918        }
6919
6920        /* dac hotplug */
6921        WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922
6923        /* digital hotplug */
6924        tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925        WREG32(DC_HPD1_INT_CONTROL, tmp);
6926        tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927        WREG32(DC_HPD2_INT_CONTROL, tmp);
6928        tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929        WREG32(DC_HPD3_INT_CONTROL, tmp);
6930        tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931        WREG32(DC_HPD4_INT_CONTROL, tmp);
6932        tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933        WREG32(DC_HPD5_INT_CONTROL, tmp);
6934        tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935        WREG32(DC_HPD6_INT_CONTROL, tmp);
6936
6937}
6938
6939/**
6940 * cik_irq_init - init and enable the interrupt ring
6941 *
6942 * @rdev: radeon_device pointer
6943 *
6944 * Allocate a ring buffer for the interrupt controller,
6945 * enable the RLC, disable interrupts, enable the IH
6946 * ring buffer and enable it (CIK).
6947 * Called at device load and reume.
6948 * Returns 0 for success, errors for failure.
6949 */
6950static int cik_irq_init(struct radeon_device *rdev)
6951{
6952        int ret = 0;
6953        int rb_bufsz;
6954        u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955
6956        /* allocate ring */
6957        ret = r600_ih_ring_alloc(rdev);
6958        if (ret)
6959                return ret;
6960
6961        /* disable irqs */
6962        cik_disable_interrupts(rdev);
6963
6964        /* init rlc */
6965        ret = cik_rlc_resume(rdev);
6966        if (ret) {
6967                r600_ih_ring_fini(rdev);
6968                return ret;
6969        }
6970
6971        /* setup interrupt control */
6972        /* XXX this should actually be a bus address, not an MC address. same on older asics */
6973        WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6974        interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975        /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976         * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977         */
6978        interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979        /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980        interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981        WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982
6983        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984        rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985
6986        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987                      IH_WPTR_OVERFLOW_CLEAR |
6988                      (rb_bufsz << 1));
6989
6990        if (rdev->wb.enabled)
6991                ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992
6993        /* set the writeback address whether it's enabled or not */
6994        WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995        WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996
6997        WREG32(IH_RB_CNTL, ih_rb_cntl);
6998
6999        /* set rptr, wptr to 0 */
7000        WREG32(IH_RB_RPTR, 0);
7001        WREG32(IH_RB_WPTR, 0);
7002
7003        /* Default settings for IH_CNTL (disabled at first) */
7004        ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005        /* RPTR_REARM only works if msi's are enabled */
7006        if (rdev->msi_enabled)
7007                ih_cntl |= RPTR_REARM;
7008        WREG32(IH_CNTL, ih_cntl);
7009
7010        /* force the active interrupt state to all disabled */
7011        cik_disable_interrupt_state(rdev);
7012
7013        pci_set_master(rdev->pdev);
7014
7015        /* enable irqs */
7016        cik_enable_interrupts(rdev);
7017
7018        return ret;
7019}
7020
7021/**
7022 * cik_irq_set - enable/disable interrupt sources
7023 *
7024 * @rdev: radeon_device pointer
7025 *
7026 * Enable interrupt sources on the GPU (vblanks, hpd,
7027 * etc.) (CIK).
7028 * Returns 0 for success, errors for failure.
7029 */
7030int cik_irq_set(struct radeon_device *rdev)
7031{
7032        u32 cp_int_cntl;
7033        u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034        u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035        u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036        u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037        u32 grbm_int_cntl = 0;
7038        u32 dma_cntl, dma_cntl1;
7039
7040        if (!rdev->irq.installed) {
7041                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042                return -EINVAL;
7043        }
7044        /* don't enable anything if the ih is disabled */
7045        if (!rdev->ih.enabled) {
7046                cik_disable_interrupts(rdev);
7047                /* force the active interrupt state to all disabled */
7048                cik_disable_interrupt_state(rdev);
7049                return 0;
7050        }
7051
7052        cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054        cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055
7056        hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057        hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058        hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059        hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060        hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061        hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062
7063        dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064        dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065
7066        cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067        cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068        cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069        cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070        cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071        cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072        cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073        cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074
7075        /* enable CP interrupts on all rings */
7076        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077                DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078                cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079        }
7080        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081                struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082                DRM_DEBUG("si_irq_set: sw int cp1\n");
7083                if (ring->me == 1) {
7084                        switch (ring->pipe) {
7085                        case 0:
7086                                cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087                                break;
7088                        case 1:
7089                                cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090                                break;
7091                        case 2:
7092                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093                                break;
7094                        case 3:
7095                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096                                break;
7097                        default:
7098                                DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099                                break;
7100                        }
7101                } else if (ring->me == 2) {
7102                        switch (ring->pipe) {
7103                        case 0:
7104                                cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105                                break;
7106                        case 1:
7107                                cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108                                break;
7109                        case 2:
7110                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111                                break;
7112                        case 3:
7113                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114                                break;
7115                        default:
7116                                DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117                                break;
7118                        }
7119                } else {
7120                        DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121                }
7122        }
7123        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124                struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125                DRM_DEBUG("si_irq_set: sw int cp2\n");
7126                if (ring->me == 1) {
7127                        switch (ring->pipe) {
7128                        case 0:
7129                                cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130                                break;
7131                        case 1:
7132                                cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133                                break;
7134                        case 2:
7135                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136                                break;
7137                        case 3:
7138                                cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139                                break;
7140                        default:
7141                                DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142                                break;
7143                        }
7144                } else if (ring->me == 2) {
7145                        switch (ring->pipe) {
7146                        case 0:
7147                                cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148                                break;
7149                        case 1:
7150                                cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151                                break;
7152                        case 2:
7153                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154                                break;
7155                        case 3:
7156                                cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157                                break;
7158                        default:
7159                                DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160                                break;
7161                        }
7162                } else {
7163                        DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164                }
7165        }
7166
7167        if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168                DRM_DEBUG("cik_irq_set: sw int dma\n");
7169                dma_cntl |= TRAP_ENABLE;
7170        }
7171
7172        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173                DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174                dma_cntl1 |= TRAP_ENABLE;
7175        }
7176
7177        if (rdev->irq.crtc_vblank_int[0] ||
7178            atomic_read(&rdev->irq.pflip[0])) {
7179                DRM_DEBUG("cik_irq_set: vblank 0\n");
7180                crtc1 |= VBLANK_INTERRUPT_MASK;
7181        }
7182        if (rdev->irq.crtc_vblank_int[1] ||
7183            atomic_read(&rdev->irq.pflip[1])) {
7184                DRM_DEBUG("cik_irq_set: vblank 1\n");
7185                crtc2 |= VBLANK_INTERRUPT_MASK;
7186        }
7187        if (rdev->irq.crtc_vblank_int[2] ||
7188            atomic_read(&rdev->irq.pflip[2])) {
7189                DRM_DEBUG("cik_irq_set: vblank 2\n");
7190                crtc3 |= VBLANK_INTERRUPT_MASK;
7191        }
7192        if (rdev->irq.crtc_vblank_int[3] ||
7193            atomic_read(&rdev->irq.pflip[3])) {
7194                DRM_DEBUG("cik_irq_set: vblank 3\n");
7195                crtc4 |= VBLANK_INTERRUPT_MASK;
7196        }
7197        if (rdev->irq.crtc_vblank_int[4] ||
7198            atomic_read(&rdev->irq.pflip[4])) {
7199                DRM_DEBUG("cik_irq_set: vblank 4\n");
7200                crtc5 |= VBLANK_INTERRUPT_MASK;
7201        }
7202        if (rdev->irq.crtc_vblank_int[5] ||
7203            atomic_read(&rdev->irq.pflip[5])) {
7204                DRM_DEBUG("cik_irq_set: vblank 5\n");
7205                crtc6 |= VBLANK_INTERRUPT_MASK;
7206        }
7207        if (rdev->irq.hpd[0]) {
7208                DRM_DEBUG("cik_irq_set: hpd 1\n");
7209                hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210        }
7211        if (rdev->irq.hpd[1]) {
7212                DRM_DEBUG("cik_irq_set: hpd 2\n");
7213                hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214        }
7215        if (rdev->irq.hpd[2]) {
7216                DRM_DEBUG("cik_irq_set: hpd 3\n");
7217                hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218        }
7219        if (rdev->irq.hpd[3]) {
7220                DRM_DEBUG("cik_irq_set: hpd 4\n");
7221                hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222        }
7223        if (rdev->irq.hpd[4]) {
7224                DRM_DEBUG("cik_irq_set: hpd 5\n");
7225                hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226        }
7227        if (rdev->irq.hpd[5]) {
7228                DRM_DEBUG("cik_irq_set: hpd 6\n");
7229                hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230        }
7231
7232        WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233
7234        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235        WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236
7237        WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238        WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239        WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240        WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241        WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242        WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243        WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244        WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245
7246        WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247
7248        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250        if (rdev->num_crtc >= 4) {
7251                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253        }
7254        if (rdev->num_crtc >= 6) {
7255                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256                WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257        }
7258
7259        if (rdev->num_crtc >= 2) {
7260                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261                       GRPH_PFLIP_INT_MASK);
7262                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263                       GRPH_PFLIP_INT_MASK);
7264        }
7265        if (rdev->num_crtc >= 4) {
7266                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267                       GRPH_PFLIP_INT_MASK);
7268                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269                       GRPH_PFLIP_INT_MASK);
7270        }
7271        if (rdev->num_crtc >= 6) {
7272                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273                       GRPH_PFLIP_INT_MASK);
7274                WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275                       GRPH_PFLIP_INT_MASK);
7276        }
7277
7278        WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279        WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280        WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281        WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282        WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283        WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284
7285        /* posting read */
7286        RREG32(SRBM_STATUS);
7287
7288        return 0;
7289}
7290
7291/**
7292 * cik_irq_ack - ack interrupt sources
7293 *
7294 * @rdev: radeon_device pointer
7295 *
7296 * Ack interrupt sources on the GPU (vblanks, hpd,
7297 * etc.) (CIK).  Certain interrupts sources are sw
7298 * generated and do not require an explicit ack.
7299 */
7300static inline void cik_irq_ack(struct radeon_device *rdev)
7301{
7302        u32 tmp;
7303
7304        rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305        rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306        rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307        rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308        rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309        rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310        rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311
7312        rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313                EVERGREEN_CRTC0_REGISTER_OFFSET);
7314        rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315                EVERGREEN_CRTC1_REGISTER_OFFSET);
7316        if (rdev->num_crtc >= 4) {
7317                rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318                        EVERGREEN_CRTC2_REGISTER_OFFSET);
7319                rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320                        EVERGREEN_CRTC3_REGISTER_OFFSET);
7321        }
7322        if (rdev->num_crtc >= 6) {
7323                rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324                        EVERGREEN_CRTC4_REGISTER_OFFSET);
7325                rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326                        EVERGREEN_CRTC5_REGISTER_OFFSET);
7327        }
7328
7329        if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330                WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331                       GRPH_PFLIP_INT_CLEAR);
7332        if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333                WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334                       GRPH_PFLIP_INT_CLEAR);
7335        if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336                WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337        if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338                WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339        if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340                WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341        if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342                WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343
7344        if (rdev->num_crtc >= 4) {
7345                if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347                               GRPH_PFLIP_INT_CLEAR);
7348                if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350                               GRPH_PFLIP_INT_CLEAR);
7351                if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353                if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355                if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357                if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359        }
7360
7361        if (rdev->num_crtc >= 6) {
7362                if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364                               GRPH_PFLIP_INT_CLEAR);
7365                if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366                        WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367                               GRPH_PFLIP_INT_CLEAR);
7368                if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370                if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372                if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373                        WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374                if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375                        WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376        }
7377
7378        if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379                tmp = RREG32(DC_HPD1_INT_CONTROL);
7380                tmp |= DC_HPDx_INT_ACK;
7381                WREG32(DC_HPD1_INT_CONTROL, tmp);
7382        }
7383        if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384                tmp = RREG32(DC_HPD2_INT_CONTROL);
7385                tmp |= DC_HPDx_INT_ACK;
7386                WREG32(DC_HPD2_INT_CONTROL, tmp);
7387        }
7388        if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389                tmp = RREG32(DC_HPD3_INT_CONTROL);
7390                tmp |= DC_HPDx_INT_ACK;
7391                WREG32(DC_HPD3_INT_CONTROL, tmp);
7392        }
7393        if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394                tmp = RREG32(DC_HPD4_INT_CONTROL);
7395                tmp |= DC_HPDx_INT_ACK;
7396                WREG32(DC_HPD4_INT_CONTROL, tmp);
7397        }
7398        if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399                tmp = RREG32(DC_HPD5_INT_CONTROL);
7400                tmp |= DC_HPDx_INT_ACK;
7401                WREG32(DC_HPD5_INT_CONTROL, tmp);
7402        }
7403        if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404                tmp = RREG32(DC_HPD6_INT_CONTROL);
7405                tmp |= DC_HPDx_INT_ACK;
7406                WREG32(DC_HPD6_INT_CONTROL, tmp);
7407        }
7408        if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409                tmp = RREG32(DC_HPD1_INT_CONTROL);
7410                tmp |= DC_HPDx_RX_INT_ACK;
7411                WREG32(DC_HPD1_INT_CONTROL, tmp);
7412        }
7413        if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414                tmp = RREG32(DC_HPD2_INT_CONTROL);
7415                tmp |= DC_HPDx_RX_INT_ACK;
7416                WREG32(DC_HPD2_INT_CONTROL, tmp);
7417        }
7418        if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419                tmp = RREG32(DC_HPD3_INT_CONTROL);
7420                tmp |= DC_HPDx_RX_INT_ACK;
7421                WREG32(DC_HPD3_INT_CONTROL, tmp);
7422        }
7423        if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424                tmp = RREG32(DC_HPD4_INT_CONTROL);
7425                tmp |= DC_HPDx_RX_INT_ACK;
7426                WREG32(DC_HPD4_INT_CONTROL, tmp);
7427        }
7428        if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429                tmp = RREG32(DC_HPD5_INT_CONTROL);
7430                tmp |= DC_HPDx_RX_INT_ACK;
7431                WREG32(DC_HPD5_INT_CONTROL, tmp);
7432        }
7433        if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434                tmp = RREG32(DC_HPD6_INT_CONTROL);
7435                tmp |= DC_HPDx_RX_INT_ACK;
7436                WREG32(DC_HPD6_INT_CONTROL, tmp);
7437        }
7438}
7439
7440/**
7441 * cik_irq_disable - disable interrupts
7442 *
7443 * @rdev: radeon_device pointer
7444 *
7445 * Disable interrupts on the hw (CIK).
7446 */
7447static void cik_irq_disable(struct radeon_device *rdev)
7448{
7449        cik_disable_interrupts(rdev);
7450        /* Wait and acknowledge irq */
7451        mdelay(1);
7452        cik_irq_ack(rdev);
7453        cik_disable_interrupt_state(rdev);
7454}
7455
7456/**
7457 * cik_irq_disable - disable interrupts for suspend
7458 *
7459 * @rdev: radeon_device pointer
7460 *
7461 * Disable interrupts and stop the RLC (CIK).
7462 * Used for suspend.
7463 */
7464static void cik_irq_suspend(struct radeon_device *rdev)
7465{
7466        cik_irq_disable(rdev);
7467        cik_rlc_stop(rdev);
7468}
7469
7470/**
7471 * cik_irq_fini - tear down interrupt support
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Disable interrupts on the hw and free the IH ring
7476 * buffer (CIK).
7477 * Used for driver unload.
7478 */
7479static void cik_irq_fini(struct radeon_device *rdev)
7480{
7481        cik_irq_suspend(rdev);
7482        r600_ih_ring_fini(rdev);
7483}
7484
7485/**
7486 * cik_get_ih_wptr - get the IH ring buffer wptr
7487 *
7488 * @rdev: radeon_device pointer
7489 *
7490 * Get the IH ring buffer wptr from either the register
7491 * or the writeback memory buffer (CIK).  Also check for
7492 * ring buffer overflow and deal with it.
7493 * Used by cik_irq_process().
7494 * Returns the value of the wptr.
7495 */
7496static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497{
7498        u32 wptr, tmp;
7499
7500        if (rdev->wb.enabled)
7501                wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502        else
7503                wptr = RREG32(IH_RB_WPTR);
7504
7505        if (wptr & RB_OVERFLOW) {
7506                wptr &= ~RB_OVERFLOW;
7507                /* When a ring buffer overflow happen start parsing interrupt
7508                 * from the last not overwritten vector (wptr + 16). Hopefully
7509                 * this should allow us to catchup.
7510                 */
7511                dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512                         wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513                rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514                tmp = RREG32(IH_RB_CNTL);
7515                tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516                WREG32(IH_RB_CNTL, tmp);
7517        }
7518        return (wptr & rdev->ih.ptr_mask);
7519}
7520
7521/*        CIK IV Ring
7522 * Each IV ring entry is 128 bits:
7523 * [7:0]    - interrupt source id
7524 * [31:8]   - reserved
7525 * [59:32]  - interrupt source data
7526 * [63:60]  - reserved
7527 * [71:64]  - RINGID
7528 *            CP:
7529 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533 *            PIPE_ID - ME0 0=3D
7534 *                    - ME1&2 compute dispatcher (4 pipes each)
7535 *            SDMA:
7536 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539 * [79:72]  - VMID
7540 * [95:80]  - PASID
7541 * [127:96] - reserved
7542 */
7543/**
7544 * cik_irq_process - interrupt handler
7545 *
7546 * @rdev: radeon_device pointer
7547 *
7548 * Interrupt hander (CIK).  Walk the IH ring,
7549 * ack interrupts and schedule work to handle
7550 * interrupt events.
7551 * Returns irq process return code.
7552 */
7553int cik_irq_process(struct radeon_device *rdev)
7554{
7555        struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556        struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557        u32 wptr;
7558        u32 rptr;
7559        u32 src_id, src_data, ring_id;
7560        u8 me_id, pipe_id, queue_id;
7561        u32 ring_index;
7562        bool queue_hotplug = false;
7563        bool queue_dp = false;
7564        bool queue_reset = false;
7565        u32 addr, status, mc_client;
7566        bool queue_thermal = false;
7567
7568        if (!rdev->ih.enabled || rdev->shutdown)
7569                return IRQ_NONE;
7570
7571        wptr = cik_get_ih_wptr(rdev);
7572
7573restart_ih:
7574        /* is somebody else already processing irqs? */
7575        if (atomic_xchg(&rdev->ih.lock, 1))
7576                return IRQ_NONE;
7577
7578        rptr = rdev->ih.rptr;
7579        DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580
7581        /* Order reading of wptr vs. reading of IH ring data */
7582        rmb();
7583
7584        /* display interrupts */
7585        cik_irq_ack(rdev);
7586
7587        while (rptr != wptr) {
7588                /* wptr/rptr are in bytes! */
7589                ring_index = rptr / 4;
7590
7591                src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592                src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593                ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594
7595                switch (src_id) {
7596                case 1: /* D1 vblank/vline */
7597                        switch (src_data) {
7598                        case 0: /* D1 vblank */
7599                                if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602                                if (rdev->irq.crtc_vblank_int[0]) {
7603                                        drm_handle_vblank(rdev->ddev, 0);
7604                                        rdev->pm.vblank_sync = true;
7605                                        wake_up(&rdev->irq.vblank_queue);
7606                                }
7607                                if (atomic_read(&rdev->irq.pflip[0]))
7608                                        radeon_crtc_handle_vblank(rdev, 0);
7609                                rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610                                DRM_DEBUG("IH: D1 vblank\n");
7611
7612                                break;
7613                        case 1: /* D1 vline */
7614                                if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617                                rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618                                DRM_DEBUG("IH: D1 vline\n");
7619
7620                                break;
7621                        default:
7622                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623                                break;
7624                        }
7625                        break;
7626                case 2: /* D2 vblank/vline */
7627                        switch (src_data) {
7628                        case 0: /* D2 vblank */
7629                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632                                if (rdev->irq.crtc_vblank_int[1]) {
7633                                        drm_handle_vblank(rdev->ddev, 1);
7634                                        rdev->pm.vblank_sync = true;
7635                                        wake_up(&rdev->irq.vblank_queue);
7636                                }
7637                                if (atomic_read(&rdev->irq.pflip[1]))
7638                                        radeon_crtc_handle_vblank(rdev, 1);
7639                                rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640                                DRM_DEBUG("IH: D2 vblank\n");
7641
7642                                break;
7643                        case 1: /* D2 vline */
7644                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647                                rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648                                DRM_DEBUG("IH: D2 vline\n");
7649
7650                                break;
7651                        default:
7652                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653                                break;
7654                        }
7655                        break;
7656                case 3: /* D3 vblank/vline */
7657                        switch (src_data) {
7658                        case 0: /* D3 vblank */
7659                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662                                if (rdev->irq.crtc_vblank_int[2]) {
7663                                        drm_handle_vblank(rdev->ddev, 2);
7664                                        rdev->pm.vblank_sync = true;
7665                                        wake_up(&rdev->irq.vblank_queue);
7666                                }
7667                                if (atomic_read(&rdev->irq.pflip[2]))
7668                                        radeon_crtc_handle_vblank(rdev, 2);
7669                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670                                DRM_DEBUG("IH: D3 vblank\n");
7671
7672                                break;
7673                        case 1: /* D3 vline */
7674                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678                                DRM_DEBUG("IH: D3 vline\n");
7679
7680                                break;
7681                        default:
7682                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683                                break;
7684                        }
7685                        break;
7686                case 4: /* D4 vblank/vline */
7687                        switch (src_data) {
7688                        case 0: /* D4 vblank */
7689                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692                                if (rdev->irq.crtc_vblank_int[3]) {
7693                                        drm_handle_vblank(rdev->ddev, 3);
7694                                        rdev->pm.vblank_sync = true;
7695                                        wake_up(&rdev->irq.vblank_queue);
7696                                }
7697                                if (atomic_read(&rdev->irq.pflip[3]))
7698                                        radeon_crtc_handle_vblank(rdev, 3);
7699                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700                                DRM_DEBUG("IH: D4 vblank\n");
7701
7702                                break;
7703                        case 1: /* D4 vline */
7704                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708                                DRM_DEBUG("IH: D4 vline\n");
7709
7710                                break;
7711                        default:
7712                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713                                break;
7714                        }
7715                        break;
7716                case 5: /* D5 vblank/vline */
7717                        switch (src_data) {
7718                        case 0: /* D5 vblank */
7719                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722                                if (rdev->irq.crtc_vblank_int[4]) {
7723                                        drm_handle_vblank(rdev->ddev, 4);
7724                                        rdev->pm.vblank_sync = true;
7725                                        wake_up(&rdev->irq.vblank_queue);
7726                                }
7727                                if (atomic_read(&rdev->irq.pflip[4]))
7728                                        radeon_crtc_handle_vblank(rdev, 4);
7729                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730                                DRM_DEBUG("IH: D5 vblank\n");
7731
7732                                break;
7733                        case 1: /* D5 vline */
7734                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738                                DRM_DEBUG("IH: D5 vline\n");
7739
7740                                break;
7741                        default:
7742                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743                                break;
7744                        }
7745                        break;
7746                case 6: /* D6 vblank/vline */
7747                        switch (src_data) {
7748                        case 0: /* D6 vblank */
7749                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752                                if (rdev->irq.crtc_vblank_int[5]) {
7753                                        drm_handle_vblank(rdev->ddev, 5);
7754                                        rdev->pm.vblank_sync = true;
7755                                        wake_up(&rdev->irq.vblank_queue);
7756                                }
7757                                if (atomic_read(&rdev->irq.pflip[5]))
7758                                        radeon_crtc_handle_vblank(rdev, 5);
7759                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760                                DRM_DEBUG("IH: D6 vblank\n");
7761
7762                                break;
7763                        case 1: /* D6 vline */
7764                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768                                DRM_DEBUG("IH: D6 vline\n");
7769
7770                                break;
7771                        default:
7772                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773                                break;
7774                        }
7775                        break;
7776                case 8: /* D1 page flip */
7777                case 10: /* D2 page flip */
7778                case 12: /* D3 page flip */
7779                case 14: /* D4 page flip */
7780                case 16: /* D5 page flip */
7781                case 18: /* D6 page flip */
7782                        DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783                        if (radeon_use_pflipirq > 0)
7784                                radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785                        break;
7786                case 42: /* HPD hotplug */
7787                        switch (src_data) {
7788                        case 0:
7789                                if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792                                rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793                                queue_hotplug = true;
7794                                DRM_DEBUG("IH: HPD1\n");
7795
7796                                break;
7797                        case 1:
7798                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801                                rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802                                queue_hotplug = true;
7803                                DRM_DEBUG("IH: HPD2\n");
7804
7805                                break;
7806                        case 2:
7807                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811                                queue_hotplug = true;
7812                                DRM_DEBUG("IH: HPD3\n");
7813
7814                                break;
7815                        case 3:
7816                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820                                queue_hotplug = true;
7821                                DRM_DEBUG("IH: HPD4\n");
7822
7823                                break;
7824                        case 4:
7825                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829                                queue_hotplug = true;
7830                                DRM_DEBUG("IH: HPD5\n");
7831
7832                                break;
7833                        case 5:
7834                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838                                queue_hotplug = true;
7839                                DRM_DEBUG("IH: HPD6\n");
7840
7841                                break;
7842                        case 6:
7843                                if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846                                rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847                                queue_dp = true;
7848                                DRM_DEBUG("IH: HPD_RX 1\n");
7849
7850                                break;
7851                        case 7:
7852                                if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855                                rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856                                queue_dp = true;
7857                                DRM_DEBUG("IH: HPD_RX 2\n");
7858
7859                                break;
7860                        case 8:
7861                                if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864                                rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865                                queue_dp = true;
7866                                DRM_DEBUG("IH: HPD_RX 3\n");
7867
7868                                break;
7869                        case 9:
7870                                if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873                                rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874                                queue_dp = true;
7875                                DRM_DEBUG("IH: HPD_RX 4\n");
7876
7877                                break;
7878                        case 10:
7879                                if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882                                rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883                                queue_dp = true;
7884                                DRM_DEBUG("IH: HPD_RX 5\n");
7885
7886                                break;
7887                        case 11:
7888                                if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889                                        DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891                                rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892                                queue_dp = true;
7893                                DRM_DEBUG("IH: HPD_RX 6\n");
7894
7895                                break;
7896                        default:
7897                                DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898                                break;
7899                        }
7900                        break;
7901                case 96:
7902                        DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903                        WREG32(SRBM_INT_ACK, 0x1);
7904                        break;
7905                case 124: /* UVD */
7906                        DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907                        radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908                        break;
7909                case 146:
7910                case 147:
7911                        addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912                        status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913                        mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914                        /* reset addr and status */
7915                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916                        if (addr == 0x0 && status == 0x0)
7917                                break;
7918                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7920                                addr);
7921                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922                                status);
7923                        cik_vm_decode_fault(rdev, status, addr, mc_client);
7924                        break;
7925                case 167: /* VCE */
7926                        DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927                        switch (src_data) {
7928                        case 0:
7929                                radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930                                break;
7931                        case 1:
7932                                radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933                                break;
7934                        default:
7935                                DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936                                break;
7937                        }
7938                        break;
7939                case 176: /* GFX RB CP_INT */
7940                case 177: /* GFX IB CP_INT */
7941                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942                        break;
7943                case 181: /* CP EOP event */
7944                        DRM_DEBUG("IH: CP EOP\n");
7945                        /* XXX check the bitfield order! */
7946                        me_id = (ring_id & 0x60) >> 5;
7947                        pipe_id = (ring_id & 0x18) >> 3;
7948                        queue_id = (ring_id & 0x7) >> 0;
7949                        switch (me_id) {
7950                        case 0:
7951                                radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952                                break;
7953                        case 1:
7954                        case 2:
7955                                if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957                                if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959                                break;
7960                        }
7961                        break;
7962                case 184: /* CP Privileged reg access */
7963                        DRM_ERROR("Illegal register access in command stream\n");
7964                        /* XXX check the bitfield order! */
7965                        me_id = (ring_id & 0x60) >> 5;
7966                        pipe_id = (ring_id & 0x18) >> 3;
7967                        queue_id = (ring_id & 0x7) >> 0;
7968                        switch (me_id) {
7969                        case 0:
7970                                /* This results in a full GPU reset, but all we need to do is soft
7971                                 * reset the CP for gfx
7972                                 */
7973                                queue_reset = true;
7974                                break;
7975                        case 1:
7976                                /* XXX compute */
7977                                queue_reset = true;
7978                                break;
7979                        case 2:
7980                                /* XXX compute */
7981                                queue_reset = true;
7982                                break;
7983                        }
7984                        break;
7985                case 185: /* CP Privileged inst */
7986                        DRM_ERROR("Illegal instruction in command stream\n");
7987                        /* XXX check the bitfield order! */
7988                        me_id = (ring_id & 0x60) >> 5;
7989                        pipe_id = (ring_id & 0x18) >> 3;
7990                        queue_id = (ring_id & 0x7) >> 0;
7991                        switch (me_id) {
7992                        case 0:
7993                                /* This results in a full GPU reset, but all we need to do is soft
7994                                 * reset the CP for gfx
7995                                 */
7996                                queue_reset = true;
7997                                break;
7998                        case 1:
7999                                /* XXX compute */
8000                                queue_reset = true;
8001                                break;
8002                        case 2:
8003                                /* XXX compute */
8004                                queue_reset = true;
8005                                break;
8006                        }
8007                        break;
8008                case 224: /* SDMA trap event */
8009                        /* XXX check the bitfield order! */
8010                        me_id = (ring_id & 0x3) >> 0;
8011                        queue_id = (ring_id & 0xc) >> 2;
8012                        DRM_DEBUG("IH: SDMA trap\n");
8013                        switch (me_id) {
8014                        case 0:
8015                                switch (queue_id) {
8016                                case 0:
8017                                        radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018                                        break;
8019                                case 1:
8020                                        /* XXX compute */
8021                                        break;
8022                                case 2:
8023                                        /* XXX compute */
8024                                        break;
8025                                }
8026                                break;
8027                        case 1:
8028                                switch (queue_id) {
8029                                case 0:
8030                                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031                                        break;
8032                                case 1:
8033                                        /* XXX compute */
8034                                        break;
8035                                case 2:
8036                                        /* XXX compute */
8037                                        break;
8038                                }
8039                                break;
8040                        }
8041                        break;
8042                case 230: /* thermal low to high */
8043                        DRM_DEBUG("IH: thermal low to high\n");
8044                        rdev->pm.dpm.thermal.high_to_low = false;
8045                        queue_thermal = true;
8046                        break;
8047                case 231: /* thermal high to low */
8048                        DRM_DEBUG("IH: thermal high to low\n");
8049                        rdev->pm.dpm.thermal.high_to_low = true;
8050                        queue_thermal = true;
8051                        break;
8052                case 233: /* GUI IDLE */
8053                        DRM_DEBUG("IH: GUI idle\n");
8054                        break;
8055                case 241: /* SDMA Privileged inst */
8056                case 247: /* SDMA Privileged inst */
8057                        DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058                        /* XXX check the bitfield order! */
8059                        me_id = (ring_id & 0x3) >> 0;
8060                        queue_id = (ring_id & 0xc) >> 2;
8061                        switch (me_id) {
8062                        case 0:
8063                                switch (queue_id) {
8064                                case 0:
8065                                        queue_reset = true;
8066                                        break;
8067                                case 1:
8068                                        /* XXX compute */
8069                                        queue_reset = true;
8070                                        break;
8071                                case 2:
8072                                        /* XXX compute */
8073                                        queue_reset = true;
8074                                        break;
8075                                }
8076                                break;
8077                        case 1:
8078                                switch (queue_id) {
8079                                case 0:
8080                                        queue_reset = true;
8081                                        break;
8082                                case 1:
8083                                        /* XXX compute */
8084                                        queue_reset = true;
8085                                        break;
8086                                case 2:
8087                                        /* XXX compute */
8088                                        queue_reset = true;
8089                                        break;
8090                                }
8091                                break;
8092                        }
8093                        break;
8094                default:
8095                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096                        break;
8097                }
8098
8099                /* wptr/rptr are in bytes! */
8100                rptr += 16;
8101                rptr &= rdev->ih.ptr_mask;
8102                WREG32(IH_RB_RPTR, rptr);
8103        }
8104        if (queue_dp)
8105                schedule_work(&rdev->dp_work);
8106        if (queue_hotplug)
8107                schedule_delayed_work(&rdev->hotplug_work, 0);
8108        if (queue_reset) {
8109                rdev->needs_reset = true;
8110                wake_up_all(&rdev->fence_queue);
8111        }
8112        if (queue_thermal)
8113                schedule_work(&rdev->pm.dpm.thermal.work);
8114        rdev->ih.rptr = rptr;
8115        atomic_set(&rdev->ih.lock, 0);
8116
8117        /* make sure wptr hasn't changed while processing */
8118        wptr = cik_get_ih_wptr(rdev);
8119        if (wptr != rptr)
8120                goto restart_ih;
8121
8122        return IRQ_HANDLED;
8123}
8124
8125/*
8126 * startup/shutdown callbacks
8127 */
8128static void cik_uvd_init(struct radeon_device *rdev)
8129{
8130        int r;
8131
8132        if (!rdev->has_uvd)
8133                return;
8134
8135        r = radeon_uvd_init(rdev);
8136        if (r) {
8137                dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138                /*
8139                 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140                 * to early fails cik_uvd_start() and thus nothing happens
8141                 * there. So it is pointless to try to go through that code
8142                 * hence why we disable uvd here.
8143                 */
8144                rdev->has_uvd = 0;
8145                return;
8146        }
8147        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149}
8150
8151static void cik_uvd_start(struct radeon_device *rdev)
8152{
8153        int r;
8154
8155        if (!rdev->has_uvd)
8156                return;
8157
8158        r = radeon_uvd_resume(rdev);
8159        if (r) {
8160                dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161                goto error;
8162        }
8163        r = uvd_v4_2_resume(rdev);
8164        if (r) {
8165                dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166                goto error;
8167        }
8168        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169        if (r) {
8170                dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171                goto error;
8172        }
8173        return;
8174
8175error:
8176        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177}
8178
8179static void cik_uvd_resume(struct radeon_device *rdev)
8180{
8181        struct radeon_ring *ring;
8182        int r;
8183
8184        if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185                return;
8186
8187        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189        if (r) {
8190                dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191                return;
8192        }
8193        r = uvd_v1_0_init(rdev);
8194        if (r) {
8195                dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196                return;
8197        }
8198}
8199
8200static void cik_vce_init(struct radeon_device *rdev)
8201{
8202        int r;
8203
8204        if (!rdev->has_vce)
8205                return;
8206
8207        r = radeon_vce_init(rdev);
8208        if (r) {
8209                dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210                /*
8211                 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212                 * to early fails cik_vce_start() and thus nothing happens
8213                 * there. So it is pointless to try to go through that code
8214                 * hence why we disable vce here.
8215                 */
8216                rdev->has_vce = 0;
8217                return;
8218        }
8219        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223}
8224
8225static void cik_vce_start(struct radeon_device *rdev)
8226{
8227        int r;
8228
8229        if (!rdev->has_vce)
8230                return;
8231
8232        r = radeon_vce_resume(rdev);
8233        if (r) {
8234                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235                goto error;
8236        }
8237        r = vce_v2_0_resume(rdev);
8238        if (r) {
8239                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240                goto error;
8241        }
8242        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243        if (r) {
8244                dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245                goto error;
8246        }
8247        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248        if (r) {
8249                dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250                goto error;
8251        }
8252        return;
8253
8254error:
8255        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257}
8258
8259static void cik_vce_resume(struct radeon_device *rdev)
8260{
8261        struct radeon_ring *ring;
8262        int r;
8263
8264        if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265                return;
8266
8267        ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269        if (r) {
8270                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271                return;
8272        }
8273        ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275        if (r) {
8276                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277                return;
8278        }
8279        r = vce_v1_0_init(rdev);
8280        if (r) {
8281                dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282                return;
8283        }
8284}
8285
8286/**
8287 * cik_startup - program the asic to a functional state
8288 *
8289 * @rdev: radeon_device pointer
8290 *
8291 * Programs the asic to a functional state (CIK).
8292 * Called by cik_init() and cik_resume().
8293 * Returns 0 for success, error for failure.
8294 */
8295static int cik_startup(struct radeon_device *rdev)
8296{
8297        struct radeon_ring *ring;
8298        u32 nop;
8299        int r;
8300
8301        /* enable pcie gen2/3 link */
8302        cik_pcie_gen3_enable(rdev);
8303        /* enable aspm */
8304        cik_program_aspm(rdev);
8305
8306        /* scratch needs to be initialized before MC */
8307        r = r600_vram_scratch_init(rdev);
8308        if (r)
8309                return r;
8310
8311        cik_mc_program(rdev);
8312
8313        if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314                r = ci_mc_load_microcode(rdev);
8315                if (r) {
8316                        DRM_ERROR("Failed to load MC firmware!\n");
8317                        return r;
8318                }
8319        }
8320
8321        r = cik_pcie_gart_enable(rdev);
8322        if (r)
8323                return r;
8324        cik_gpu_init(rdev);
8325
8326        /* allocate rlc buffers */
8327        if (rdev->flags & RADEON_IS_IGP) {
8328                if (rdev->family == CHIP_KAVERI) {
8329                        rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330                        rdev->rlc.reg_list_size =
8331                                (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332                } else {
8333                        rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334                        rdev->rlc.reg_list_size =
8335                                (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336                }
8337        }
8338        rdev->rlc.cs_data = ci_cs_data;
8339        rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340        rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341        r = sumo_rlc_init(rdev);
8342        if (r) {
8343                DRM_ERROR("Failed to init rlc BOs!\n");
8344                return r;
8345        }
8346
8347        /* allocate wb buffer */
8348        r = radeon_wb_init(rdev);
8349        if (r)
8350                return r;
8351
8352        /* allocate mec buffers */
8353        r = cik_mec_init(rdev);
8354        if (r) {
8355                DRM_ERROR("Failed to init MEC BOs!\n");
8356                return r;
8357        }
8358
8359        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360        if (r) {
8361                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362                return r;
8363        }
8364
8365        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366        if (r) {
8367                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368                return r;
8369        }
8370
8371        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372        if (r) {
8373                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374                return r;
8375        }
8376
8377        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378        if (r) {
8379                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380                return r;
8381        }
8382
8383        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384        if (r) {
8385                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386                return r;
8387        }
8388
8389        cik_uvd_start(rdev);
8390        cik_vce_start(rdev);
8391
8392        /* Enable IRQ */
8393        if (!rdev->irq.installed) {
8394                r = radeon_irq_kms_init(rdev);
8395                if (r)
8396                        return r;
8397        }
8398
8399        r = cik_irq_init(rdev);
8400        if (r) {
8401                DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402                radeon_irq_kms_fini(rdev);
8403                return r;
8404        }
8405        cik_irq_set(rdev);
8406
8407        if (rdev->family == CHIP_HAWAII) {
8408                if (rdev->new_fw)
8409                        nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410                else
8411                        nop = RADEON_CP_PACKET2;
8412        } else {
8413                nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414        }
8415
8416        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418                             nop);
8419        if (r)
8420                return r;
8421
8422        /* set up the compute queues */
8423        /* type-2 packets are deprecated on MEC, use type-3 instead */
8424        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426                             nop);
8427        if (r)
8428                return r;
8429        ring->me = 1; /* first MEC */
8430        ring->pipe = 0; /* first pipe */
8431        ring->queue = 0; /* first queue */
8432        ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433
8434        /* type-2 packets are deprecated on MEC, use type-3 instead */
8435        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437                             nop);
8438        if (r)
8439                return r;
8440        /* dGPU only have 1 MEC */
8441        ring->me = 1; /* first MEC */
8442        ring->pipe = 0; /* first pipe */
8443        ring->queue = 1; /* second queue */
8444        ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445
8446        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448                             SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449        if (r)
8450                return r;
8451
8452        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454                             SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455        if (r)
8456                return r;
8457
8458        r = cik_cp_resume(rdev);
8459        if (r)
8460                return r;
8461
8462        r = cik_sdma_resume(rdev);
8463        if (r)
8464                return r;
8465
8466        cik_uvd_resume(rdev);
8467        cik_vce_resume(rdev);
8468
8469        r = radeon_ib_pool_init(rdev);
8470        if (r) {
8471                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472                return r;
8473        }
8474
8475        r = radeon_vm_manager_init(rdev);
8476        if (r) {
8477                dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478                return r;
8479        }
8480
8481        r = radeon_audio_init(rdev);
8482        if (r)
8483                return r;
8484
8485        return 0;
8486}
8487
8488/**
8489 * cik_resume - resume the asic to a functional state
8490 *
8491 * @rdev: radeon_device pointer
8492 *
8493 * Programs the asic to a functional state (CIK).
8494 * Called at resume.
8495 * Returns 0 for success, error for failure.
8496 */
8497int cik_resume(struct radeon_device *rdev)
8498{
8499        int r;
8500
8501        /* post card */
8502        atom_asic_init(rdev->mode_info.atom_context);
8503
8504        /* init golden registers */
8505        cik_init_golden_registers(rdev);
8506
8507        if (rdev->pm.pm_method == PM_METHOD_DPM)
8508                radeon_pm_resume(rdev);
8509
8510        rdev->accel_working = true;
8511        r = cik_startup(rdev);
8512        if (r) {
8513                DRM_ERROR("cik startup failed on resume\n");
8514                rdev->accel_working = false;
8515                return r;
8516        }
8517
8518        return r;
8519
8520}
8521
8522/**
8523 * cik_suspend - suspend the asic
8524 *
8525 * @rdev: radeon_device pointer
8526 *
8527 * Bring the chip into a state suitable for suspend (CIK).
8528 * Called at suspend.
8529 * Returns 0 for success.
8530 */
8531int cik_suspend(struct radeon_device *rdev)
8532{
8533        radeon_pm_suspend(rdev);
8534        radeon_audio_fini(rdev);
8535        radeon_vm_manager_fini(rdev);
8536        cik_cp_enable(rdev, false);
8537        cik_sdma_enable(rdev, false);
8538        if (rdev->has_uvd) {
8539                uvd_v1_0_fini(rdev);
8540                radeon_uvd_suspend(rdev);
8541        }
8542        if (rdev->has_vce)
8543                radeon_vce_suspend(rdev);
8544        cik_fini_pg(rdev);
8545        cik_fini_cg(rdev);
8546        cik_irq_suspend(rdev);
8547        radeon_wb_disable(rdev);
8548        cik_pcie_gart_disable(rdev);
8549        return 0;
8550}
8551
8552/* Plan is to move initialization in that function and use
8553 * helper function so that radeon_device_init pretty much
8554 * do nothing more than calling asic specific function. This
8555 * should also allow to remove a bunch of callback function
8556 * like vram_info.
8557 */
8558/**
8559 * cik_init - asic specific driver and hw init
8560 *
8561 * @rdev: radeon_device pointer
8562 *
8563 * Setup asic specific driver variables and program the hw
8564 * to a functional state (CIK).
8565 * Called at driver startup.
8566 * Returns 0 for success, errors for failure.
8567 */
8568int cik_init(struct radeon_device *rdev)
8569{
8570        struct radeon_ring *ring;
8571        int r;
8572
8573        /* Read BIOS */
8574        if (!radeon_get_bios(rdev)) {
8575                if (ASIC_IS_AVIVO(rdev))
8576                        return -EINVAL;
8577        }
8578        /* Must be an ATOMBIOS */
8579        if (!rdev->is_atom_bios) {
8580                dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581                return -EINVAL;
8582        }
8583        r = radeon_atombios_init(rdev);
8584        if (r)
8585                return r;
8586
8587        /* Post card if necessary */
8588        if (!radeon_card_posted(rdev)) {
8589                if (!rdev->bios) {
8590                        dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591                        return -EINVAL;
8592                }
8593                DRM_INFO("GPU not posted. posting now...\n");
8594                atom_asic_init(rdev->mode_info.atom_context);
8595        }
8596        /* init golden registers */
8597        cik_init_golden_registers(rdev);
8598        /* Initialize scratch registers */
8599        cik_scratch_init(rdev);
8600        /* Initialize surface registers */
8601        radeon_surface_init(rdev);
8602        /* Initialize clocks */
8603        radeon_get_clock_info(rdev->ddev);
8604
8605        /* Fence driver */
8606        r = radeon_fence_driver_init(rdev);
8607        if (r)
8608                return r;
8609
8610        /* initialize memory controller */
8611        r = cik_mc_init(rdev);
8612        if (r)
8613                return r;
8614        /* Memory manager */
8615        r = radeon_bo_init(rdev);
8616        if (r)
8617                return r;
8618
8619        if (rdev->flags & RADEON_IS_IGP) {
8620                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621                    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622                        r = cik_init_microcode(rdev);
8623                        if (r) {
8624                                DRM_ERROR("Failed to load firmware!\n");
8625                                return r;
8626                        }
8627                }
8628        } else {
8629                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630                    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631                    !rdev->mc_fw) {
8632                        r = cik_init_microcode(rdev);
8633                        if (r) {
8634                                DRM_ERROR("Failed to load firmware!\n");
8635                                return r;
8636                        }
8637                }
8638        }
8639
8640        /* Initialize power management */
8641        radeon_pm_init(rdev);
8642
8643        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644        ring->ring_obj = NULL;
8645        r600_ring_init(rdev, ring, 1024 * 1024);
8646
8647        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648        ring->ring_obj = NULL;
8649        r600_ring_init(rdev, ring, 1024 * 1024);
8650        r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651        if (r)
8652                return r;
8653
8654        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655        ring->ring_obj = NULL;
8656        r600_ring_init(rdev, ring, 1024 * 1024);
8657        r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658        if (r)
8659                return r;
8660
8661        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662        ring->ring_obj = NULL;
8663        r600_ring_init(rdev, ring, 256 * 1024);
8664
8665        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666        ring->ring_obj = NULL;
8667        r600_ring_init(rdev, ring, 256 * 1024);
8668
8669        cik_uvd_init(rdev);
8670        cik_vce_init(rdev);
8671
8672        rdev->ih.ring_obj = NULL;
8673        r600_ih_ring_init(rdev, 64 * 1024);
8674
8675        r = r600_pcie_gart_init(rdev);
8676        if (r)
8677                return r;
8678
8679        rdev->accel_working = true;
8680        r = cik_startup(rdev);
8681        if (r) {
8682                dev_err(rdev->dev, "disabling GPU acceleration\n");
8683                cik_cp_fini(rdev);
8684                cik_sdma_fini(rdev);
8685                cik_irq_fini(rdev);
8686                sumo_rlc_fini(rdev);
8687                cik_mec_fini(rdev);
8688                radeon_wb_fini(rdev);
8689                radeon_ib_pool_fini(rdev);
8690                radeon_vm_manager_fini(rdev);
8691                radeon_irq_kms_fini(rdev);
8692                cik_pcie_gart_fini(rdev);
8693                rdev->accel_working = false;
8694        }
8695
8696        /* Don't start up if the MC ucode is missing.
8697         * The default clocks and voltages before the MC ucode
8698         * is loaded are not suffient for advanced operations.
8699         */
8700        if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701                DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702                return -EINVAL;
8703        }
8704
8705        return 0;
8706}
8707
8708/**
8709 * cik_fini - asic specific driver and hw fini
8710 *
8711 * @rdev: radeon_device pointer
8712 *
8713 * Tear down the asic specific driver variables and program the hw
8714 * to an idle state (CIK).
8715 * Called at driver unload.
8716 */
8717void cik_fini(struct radeon_device *rdev)
8718{
8719        radeon_pm_fini(rdev);
8720        cik_cp_fini(rdev);
8721        cik_sdma_fini(rdev);
8722        cik_fini_pg(rdev);
8723        cik_fini_cg(rdev);
8724        cik_irq_fini(rdev);
8725        sumo_rlc_fini(rdev);
8726        cik_mec_fini(rdev);
8727        radeon_wb_fini(rdev);
8728        radeon_vm_manager_fini(rdev);
8729        radeon_ib_pool_fini(rdev);
8730        radeon_irq_kms_fini(rdev);
8731        uvd_v1_0_fini(rdev);
8732        radeon_uvd_fini(rdev);
8733        radeon_vce_fini(rdev);
8734        cik_pcie_gart_fini(rdev);
8735        r600_vram_scratch_fini(rdev);
8736        radeon_gem_fini(rdev);
8737        radeon_fence_driver_fini(rdev);
8738        radeon_bo_fini(rdev);
8739        radeon_atombios_fini(rdev);
8740        kfree(rdev->bios);
8741        rdev->bios = NULL;
8742}
8743
8744void dce8_program_fmt(struct drm_encoder *encoder)
8745{
8746        struct drm_device *dev = encoder->dev;
8747        struct radeon_device *rdev = dev->dev_private;
8748        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749        struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751        int bpc = 0;
8752        u32 tmp = 0;
8753        enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754
8755        if (connector) {
8756                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757                bpc = radeon_get_monitor_bpc(connector);
8758                dither = radeon_connector->dither;
8759        }
8760
8761        /* LVDS/eDP FMT is set up by atom */
8762        if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763                return;
8764
8765        /* not needed for analog */
8766        if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767            (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768                return;
8769
8770        if (bpc == 0)
8771                return;
8772
8773        switch (bpc) {
8774        case 6:
8775                if (dither == RADEON_FMT_DITHER_ENABLE)
8776                        /* XXX sort out optimal dither settings */
8777                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779                else
8780                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781                break;
8782        case 8:
8783                if (dither == RADEON_FMT_DITHER_ENABLE)
8784                        /* XXX sort out optimal dither settings */
8785                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786                                FMT_RGB_RANDOM_ENABLE |
8787                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788                else
8789                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790                break;
8791        case 10:
8792                if (dither == RADEON_FMT_DITHER_ENABLE)
8793                        /* XXX sort out optimal dither settings */
8794                        tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795                                FMT_RGB_RANDOM_ENABLE |
8796                                FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797                else
8798                        tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799                break;
8800        default:
8801                /* not needed */
8802                break;
8803        }
8804
8805        WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806}
8807
8808/* display watermark setup */
8809/**
8810 * dce8_line_buffer_adjust - Set up the line buffer
8811 *
8812 * @rdev: radeon_device pointer
8813 * @radeon_crtc: the selected display controller
8814 * @mode: the current display mode on the selected display
8815 * controller
8816 *
8817 * Setup up the line buffer allocation for
8818 * the selected display controller (CIK).
8819 * Returns the line buffer size in pixels.
8820 */
8821static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822                                   struct radeon_crtc *radeon_crtc,
8823                                   struct drm_display_mode *mode)
8824{
8825        u32 tmp, buffer_alloc, i;
8826        u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827        /*
8828         * Line Buffer Setup
8829         * There are 6 line buffers, one for each display controllers.
8830         * There are 3 partitions per LB. Select the number of partitions
8831         * to enable based on the display width.  For display widths larger
8832         * than 4096, you need use to use 2 display controllers and combine
8833         * them using the stereo blender.
8834         */
8835        if (radeon_crtc->base.enabled && mode) {
8836                if (mode->crtc_hdisplay < 1920) {
8837                        tmp = 1;
8838                        buffer_alloc = 2;
8839                } else if (mode->crtc_hdisplay < 2560) {
8840                        tmp = 2;
8841                        buffer_alloc = 2;
8842                } else if (mode->crtc_hdisplay < 4096) {
8843                        tmp = 0;
8844                        buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845                } else {
8846                        DRM_DEBUG_KMS("Mode too big for LB!\n");
8847                        tmp = 0;
8848                        buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849                }
8850        } else {
8851                tmp = 1;
8852                buffer_alloc = 0;
8853        }
8854
8855        WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856               LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857
8858        WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859               DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860        for (i = 0; i < rdev->usec_timeout; i++) {
8861                if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862                    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863                        break;
8864                udelay(1);
8865        }
8866
8867        if (radeon_crtc->base.enabled && mode) {
8868                switch (tmp) {
8869                case 0:
8870                default:
8871                        return 4096 * 2;
8872                case 1:
8873                        return 1920 * 2;
8874                case 2:
8875                        return 2560 * 2;
8876                }
8877        }
8878
8879        /* controller not enabled, so no lb used */
8880        return 0;
8881}
8882
8883/**
8884 * cik_get_number_of_dram_channels - get the number of dram channels
8885 *
8886 * @rdev: radeon_device pointer
8887 *
8888 * Look up the number of video ram channels (CIK).
8889 * Used for display watermark bandwidth calculations
8890 * Returns the number of dram channels
8891 */
8892static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893{
8894        u32 tmp = RREG32(MC_SHARED_CHMAP);
8895
8896        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897        case 0:
8898        default:
8899                return 1;
8900        case 1:
8901                return 2;
8902        case 2:
8903                return 4;
8904        case 3:
8905                return 8;
8906        case 4:
8907                return 3;
8908        case 5:
8909                return 6;
8910        case 6:
8911                return 10;
8912        case 7:
8913                return 12;
8914        case 8:
8915                return 16;
8916        }
8917}
8918
8919struct dce8_wm_params {
8920        u32 dram_channels; /* number of dram channels */
8921        u32 yclk;          /* bandwidth per dram data pin in kHz */
8922        u32 sclk;          /* engine clock in kHz */
8923        u32 disp_clk;      /* display clock in kHz */
8924        u32 src_width;     /* viewport width */
8925        u32 active_time;   /* active display time in ns */
8926        u32 blank_time;    /* blank time in ns */
8927        bool interlaced;    /* mode is interlaced */
8928        fixed20_12 vsc;    /* vertical scale ratio */
8929        u32 num_heads;     /* number of active crtcs */
8930        u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931        u32 lb_size;       /* line buffer allocated to pipe */
8932        u32 vtaps;         /* vertical scaler taps */
8933};
8934
8935/**
8936 * dce8_dram_bandwidth - get the dram bandwidth
8937 *
8938 * @wm: watermark calculation data
8939 *
8940 * Calculate the raw dram bandwidth (CIK).
8941 * Used for display watermark bandwidth calculations
8942 * Returns the dram bandwidth in MBytes/s
8943 */
8944static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945{
8946        /* Calculate raw DRAM Bandwidth */
8947        fixed20_12 dram_efficiency; /* 0.7 */
8948        fixed20_12 yclk, dram_channels, bandwidth;
8949        fixed20_12 a;
8950
8951        a.full = dfixed_const(1000);
8952        yclk.full = dfixed_const(wm->yclk);
8953        yclk.full = dfixed_div(yclk, a);
8954        dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955        a.full = dfixed_const(10);
8956        dram_efficiency.full = dfixed_const(7);
8957        dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958        bandwidth.full = dfixed_mul(dram_channels, yclk);
8959        bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960
8961        return dfixed_trunc(bandwidth);
8962}
8963
8964/**
8965 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966 *
8967 * @wm: watermark calculation data
8968 *
8969 * Calculate the dram bandwidth used for display (CIK).
8970 * Used for display watermark bandwidth calculations
8971 * Returns the dram bandwidth for display in MBytes/s
8972 */
8973static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974{
8975        /* Calculate DRAM Bandwidth and the part allocated to display. */
8976        fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977        fixed20_12 yclk, dram_channels, bandwidth;
8978        fixed20_12 a;
8979
8980        a.full = dfixed_const(1000);
8981        yclk.full = dfixed_const(wm->yclk);
8982        yclk.full = dfixed_div(yclk, a);
8983        dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984        a.full = dfixed_const(10);
8985        disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986        disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987        bandwidth.full = dfixed_mul(dram_channels, yclk);
8988        bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989
8990        return dfixed_trunc(bandwidth);
8991}
8992
8993/**
8994 * dce8_data_return_bandwidth - get the data return bandwidth
8995 *
8996 * @wm: watermark calculation data
8997 *
8998 * Calculate the data return bandwidth used for display (CIK).
8999 * Used for display watermark bandwidth calculations
9000 * Returns the data return bandwidth in MBytes/s
9001 */
9002static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003{
9004        /* Calculate the display Data return Bandwidth */
9005        fixed20_12 return_efficiency; /* 0.8 */
9006        fixed20_12 sclk, bandwidth;
9007        fixed20_12 a;
9008
9009        a.full = dfixed_const(1000);
9010        sclk.full = dfixed_const(wm->sclk);
9011        sclk.full = dfixed_div(sclk, a);
9012        a.full = dfixed_const(10);
9013        return_efficiency.full = dfixed_const(8);
9014        return_efficiency.full = dfixed_div(return_efficiency, a);
9015        a.full = dfixed_const(32);
9016        bandwidth.full = dfixed_mul(a, sclk);
9017        bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018
9019        return dfixed_trunc(bandwidth);
9020}
9021
9022/**
9023 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024 *
9025 * @wm: watermark calculation data
9026 *
9027 * Calculate the dmif bandwidth used for display (CIK).
9028 * Used for display watermark bandwidth calculations
9029 * Returns the dmif bandwidth in MBytes/s
9030 */
9031static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032{
9033        /* Calculate the DMIF Request Bandwidth */
9034        fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035        fixed20_12 disp_clk, bandwidth;
9036        fixed20_12 a, b;
9037
9038        a.full = dfixed_const(1000);
9039        disp_clk.full = dfixed_const(wm->disp_clk);
9040        disp_clk.full = dfixed_div(disp_clk, a);
9041        a.full = dfixed_const(32);
9042        b.full = dfixed_mul(a, disp_clk);
9043
9044        a.full = dfixed_const(10);
9045        disp_clk_request_efficiency.full = dfixed_const(8);
9046        disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047
9048        bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049
9050        return dfixed_trunc(bandwidth);
9051}
9052
9053/**
9054 * dce8_available_bandwidth - get the min available bandwidth
9055 *
9056 * @wm: watermark calculation data
9057 *
9058 * Calculate the min available bandwidth used for display (CIK).
9059 * Used for display watermark bandwidth calculations
9060 * Returns the min available bandwidth in MBytes/s
9061 */
9062static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063{
9064        /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065        u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066        u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067        u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068
9069        return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070}
9071
9072/**
9073 * dce8_average_bandwidth - get the average available bandwidth
9074 *
9075 * @wm: watermark calculation data
9076 *
9077 * Calculate the average available bandwidth used for display (CIK).
9078 * Used for display watermark bandwidth calculations
9079 * Returns the average available bandwidth in MBytes/s
9080 */
9081static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082{
9083        /* Calculate the display mode Average Bandwidth
9084         * DisplayMode should contain the source and destination dimensions,
9085         * timing, etc.
9086         */
9087        fixed20_12 bpp;
9088        fixed20_12 line_time;
9089        fixed20_12 src_width;
9090        fixed20_12 bandwidth;
9091        fixed20_12 a;
9092
9093        a.full = dfixed_const(1000);
9094        line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095        line_time.full = dfixed_div(line_time, a);
9096        bpp.full = dfixed_const(wm->bytes_per_pixel);
9097        src_width.full = dfixed_const(wm->src_width);
9098        bandwidth.full = dfixed_mul(src_width, bpp);
9099        bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100        bandwidth.full = dfixed_div(bandwidth, line_time);
9101
9102        return dfixed_trunc(bandwidth);
9103}
9104
9105/**
9106 * dce8_latency_watermark - get the latency watermark
9107 *
9108 * @wm: watermark calculation data
9109 *
9110 * Calculate the latency watermark (CIK).
9111 * Used for display watermark bandwidth calculations
9112 * Returns the latency watermark in ns
9113 */
9114static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115{
9116        /* First calculate the latency in ns */
9117        u32 mc_latency = 2000; /* 2000 ns. */
9118        u32 available_bandwidth = dce8_available_bandwidth(wm);
9119        u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120        u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121        u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122        u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123                (wm->num_heads * cursor_line_pair_return_time);
9124        u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125        u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126        u32 tmp, dmif_size = 12288;
9127        fixed20_12 a, b, c;
9128
9129        if (wm->num_heads == 0)
9130                return 0;
9131
9132        a.full = dfixed_const(2);
9133        b.full = dfixed_const(1);
9134        if ((wm->vsc.full > a.full) ||
9135            ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136            (wm->vtaps >= 5) ||
9137            ((wm->vsc.full >= a.full) && wm->interlaced))
9138                max_src_lines_per_dst_line = 4;
9139        else
9140                max_src_lines_per_dst_line = 2;
9141
9142        a.full = dfixed_const(available_bandwidth);
9143        b.full = dfixed_const(wm->num_heads);
9144        a.full = dfixed_div(a, b);
9145        tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146        tmp = min(dfixed_trunc(a), tmp);
9147
9148        lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149
9150        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151        b.full = dfixed_const(1000);
9152        c.full = dfixed_const(lb_fill_bw);
9153        b.full = dfixed_div(c, b);
9154        a.full = dfixed_div(a, b);
9155        line_fill_time = dfixed_trunc(a);
9156
9157        if (line_fill_time < wm->active_time)
9158                return latency;
9159        else
9160                return latency + (line_fill_time - wm->active_time);
9161
9162}
9163
9164/**
9165 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166 * average and available dram bandwidth
9167 *
9168 * @wm: watermark calculation data
9169 *
9170 * Check if the display average bandwidth fits in the display
9171 * dram bandwidth (CIK).
9172 * Used for display watermark bandwidth calculations
9173 * Returns true if the display fits, false if not.
9174 */
9175static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176{
9177        if (dce8_average_bandwidth(wm) <=
9178            (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179                return true;
9180        else
9181                return false;
9182}
9183
9184/**
9185 * dce8_average_bandwidth_vs_available_bandwidth - check
9186 * average and available bandwidth
9187 *
9188 * @wm: watermark calculation data
9189 *
9190 * Check if the display average bandwidth fits in the display
9191 * available bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns true if the display fits, false if not.
9194 */
9195static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196{
9197        if (dce8_average_bandwidth(wm) <=
9198            (dce8_available_bandwidth(wm) / wm->num_heads))
9199                return true;
9200        else
9201                return false;
9202}
9203
9204/**
9205 * dce8_check_latency_hiding - check latency hiding
9206 *
9207 * @wm: watermark calculation data
9208 *
9209 * Check latency hiding (CIK).
9210 * Used for display watermark bandwidth calculations
9211 * Returns true if the display fits, false if not.
9212 */
9213static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214{
9215        u32 lb_partitions = wm->lb_size / wm->src_width;
9216        u32 line_time = wm->active_time + wm->blank_time;
9217        u32 latency_tolerant_lines;
9218        u32 latency_hiding;
9219        fixed20_12 a;
9220
9221        a.full = dfixed_const(1);
9222        if (wm->vsc.full > a.full)
9223                latency_tolerant_lines = 1;
9224        else {
9225                if (lb_partitions <= (wm->vtaps + 1))
9226                        latency_tolerant_lines = 1;
9227                else
9228                        latency_tolerant_lines = 2;
9229        }
9230
9231        latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232
9233        if (dce8_latency_watermark(wm) <= latency_hiding)
9234                return true;
9235        else
9236                return false;
9237}
9238
9239/**
9240 * dce8_program_watermarks - program display watermarks
9241 *
9242 * @rdev: radeon_device pointer
9243 * @radeon_crtc: the selected display controller
9244 * @lb_size: line buffer size
9245 * @num_heads: number of display controllers in use
9246 *
9247 * Calculate and program the display watermarks for the
9248 * selected display controller (CIK).
9249 */
9250static void dce8_program_watermarks(struct radeon_device *rdev,
9251                                    struct radeon_crtc *radeon_crtc,
9252                                    u32 lb_size, u32 num_heads)
9253{
9254        struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255        struct dce8_wm_params wm_low, wm_high;
9256        u32 active_time;
9257        u32 line_time = 0;
9258        u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259        u32 tmp, wm_mask;
9260
9261        if (radeon_crtc->base.enabled && num_heads && mode) {
9262                active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263                                            (u32)mode->clock);
9264                line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265                                          (u32)mode->clock);
9266                line_time = min(line_time, (u32)65535);
9267
9268                /* watermark for high clocks */
9269                if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270                    rdev->pm.dpm_enabled) {
9271                        wm_high.yclk =
9272                                radeon_dpm_get_mclk(rdev, false) * 10;
9273                        wm_high.sclk =
9274                                radeon_dpm_get_sclk(rdev, false) * 10;
9275                } else {
9276                        wm_high.yclk = rdev->pm.current_mclk * 10;
9277                        wm_high.sclk = rdev->pm.current_sclk * 10;
9278                }
9279
9280                wm_high.disp_clk = mode->clock;
9281                wm_high.src_width = mode->crtc_hdisplay;
9282                wm_high.active_time = active_time;
9283                wm_high.blank_time = line_time - wm_high.active_time;
9284                wm_high.interlaced = false;
9285                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286                        wm_high.interlaced = true;
9287                wm_high.vsc = radeon_crtc->vsc;
9288                wm_high.vtaps = 1;
9289                if (radeon_crtc->rmx_type != RMX_OFF)
9290                        wm_high.vtaps = 2;
9291                wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292                wm_high.lb_size = lb_size;
9293                wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294                wm_high.num_heads = num_heads;
9295
9296                /* set for high clocks */
9297                latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298
9299                /* possibly force display priority to high */
9300                /* should really do this at mode validation time... */
9301                if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302                    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303                    !dce8_check_latency_hiding(&wm_high) ||
9304                    (rdev->disp_priority == 2)) {
9305                        DRM_DEBUG_KMS("force priority to high\n");
9306                }
9307
9308                /* watermark for low clocks */
9309                if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310                    rdev->pm.dpm_enabled) {
9311                        wm_low.yclk =
9312                                radeon_dpm_get_mclk(rdev, true) * 10;
9313                        wm_low.sclk =
9314                                radeon_dpm_get_sclk(rdev, true) * 10;
9315                } else {
9316                        wm_low.yclk = rdev->pm.current_mclk * 10;
9317                        wm_low.sclk = rdev->pm.current_sclk * 10;
9318                }
9319
9320                wm_low.disp_clk = mode->clock;
9321                wm_low.src_width = mode->crtc_hdisplay;
9322                wm_low.active_time = active_time;
9323                wm_low.blank_time = line_time - wm_low.active_time;
9324                wm_low.interlaced = false;
9325                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326                        wm_low.interlaced = true;
9327                wm_low.vsc = radeon_crtc->vsc;
9328                wm_low.vtaps = 1;
9329                if (radeon_crtc->rmx_type != RMX_OFF)
9330                        wm_low.vtaps = 2;
9331                wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332                wm_low.lb_size = lb_size;
9333                wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334                wm_low.num_heads = num_heads;
9335
9336                /* set for low clocks */
9337                latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338
9339                /* possibly force display priority to high */
9340                /* should really do this at mode validation time... */
9341                if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342                    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343                    !dce8_check_latency_hiding(&wm_low) ||
9344                    (rdev->disp_priority == 2)) {
9345                        DRM_DEBUG_KMS("force priority to high\n");
9346                }
9347
9348                /* Save number of lines the linebuffer leads before the scanout */
9349                radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350        }
9351
9352        /* select wm A */
9353        wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354        tmp = wm_mask;
9355        tmp &= ~LATENCY_WATERMARK_MASK(3);
9356        tmp |= LATENCY_WATERMARK_MASK(1);
9357        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359               (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360                LATENCY_HIGH_WATERMARK(line_time)));
9361        /* select wm B */
9362        tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363        tmp &= ~LATENCY_WATERMARK_MASK(3);
9364        tmp |= LATENCY_WATERMARK_MASK(2);
9365        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367               (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368                LATENCY_HIGH_WATERMARK(line_time)));
9369        /* restore original selection */
9370        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371
9372        /* save values for DPM */
9373        radeon_crtc->line_time = line_time;
9374        radeon_crtc->wm_high = latency_watermark_a;
9375        radeon_crtc->wm_low = latency_watermark_b;
9376}
9377
9378/**
9379 * dce8_bandwidth_update - program display watermarks
9380 *
9381 * @rdev: radeon_device pointer
9382 *
9383 * Calculate and program the display watermarks and line
9384 * buffer allocation (CIK).
9385 */
9386void dce8_bandwidth_update(struct radeon_device *rdev)
9387{
9388        struct drm_display_mode *mode = NULL;
9389        u32 num_heads = 0, lb_size;
9390        int i;
9391
9392        if (!rdev->mode_info.mode_config_initialized)
9393                return;
9394
9395        radeon_update_display_priority(rdev);
9396
9397        for (i = 0; i < rdev->num_crtc; i++) {
9398                if (rdev->mode_info.crtcs[i]->base.enabled)
9399                        num_heads++;
9400        }
9401        for (i = 0; i < rdev->num_crtc; i++) {
9402                mode = &rdev->mode_info.crtcs[i]->base.mode;
9403                lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404                dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405        }
9406}
9407
9408/**
9409 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410 *
9411 * @rdev: radeon_device pointer
9412 *
9413 * Fetches a GPU clock counter snapshot (SI).
9414 * Returns the 64 bit clock counter snapshot.
9415 */
9416uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417{
9418        uint64_t clock;
9419
9420        mutex_lock(&rdev->gpu_clock_mutex);
9421        WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422        clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423                ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424        mutex_unlock(&rdev->gpu_clock_mutex);
9425        return clock;
9426}
9427
9428static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429                             u32 cntl_reg, u32 status_reg)
9430{
9431        int r, i;
9432        struct atom_clock_dividers dividers;
9433        uint32_t tmp;
9434
9435        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436                                           clock, false, &dividers);
9437        if (r)
9438                return r;
9439
9440        tmp = RREG32_SMC(cntl_reg);
9441        tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442        tmp |= dividers.post_divider;
9443        WREG32_SMC(cntl_reg, tmp);
9444
9445        for (i = 0; i < 100; i++) {
9446                if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447                        break;
9448                mdelay(10);
9449        }
9450        if (i == 100)
9451                return -ETIMEDOUT;
9452
9453        return 0;
9454}
9455
9456int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457{
9458        int r = 0;
9459
9460        r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461        if (r)
9462                return r;
9463
9464        r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465        return r;
9466}
9467
9468int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469{
9470        int r, i;
9471        struct atom_clock_dividers dividers;
9472        u32 tmp;
9473
9474        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475                                           ecclk, false, &dividers);
9476        if (r)
9477                return r;
9478
9479        for (i = 0; i < 100; i++) {
9480                if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481                        break;
9482                mdelay(10);
9483        }
9484        if (i == 100)
9485                return -ETIMEDOUT;
9486
9487        tmp = RREG32_SMC(CG_ECLK_CNTL);
9488        tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489        tmp |= dividers.post_divider;
9490        WREG32_SMC(CG_ECLK_CNTL, tmp);
9491
9492        for (i = 0; i < 100; i++) {
9493                if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494                        break;
9495                mdelay(10);
9496        }
9497        if (i == 100)
9498                return -ETIMEDOUT;
9499
9500        return 0;
9501}
9502
9503static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504{
9505        struct pci_dev *root = rdev->pdev->bus->self;
9506        enum pci_bus_speed speed_cap;
9507        int bridge_pos, gpu_pos;
9508        u32 speed_cntl, current_data_rate;
9509        int i;
9510        u16 tmp16;
9511
9512        if (pci_is_root_bus(rdev->pdev->bus))
9513                return;
9514
9515        if (radeon_pcie_gen2 == 0)
9516                return;
9517
9518        if (rdev->flags & RADEON_IS_IGP)
9519                return;
9520
9521        if (!(rdev->flags & RADEON_IS_PCIE))
9522                return;
9523
9524        speed_cap = pcie_get_speed_cap(root);
9525        if (speed_cap == PCI_SPEED_UNKNOWN)
9526                return;
9527
9528        if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529            (speed_cap != PCIE_SPEED_5_0GT))
9530                return;
9531
9532        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533        current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534                LC_CURRENT_DATA_RATE_SHIFT;
9535        if (speed_cap == PCIE_SPEED_8_0GT) {
9536                if (current_data_rate == 2) {
9537                        DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538                        return;
9539                }
9540                DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541        } else if (speed_cap == PCIE_SPEED_5_0GT) {
9542                if (current_data_rate == 1) {
9543                        DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544                        return;
9545                }
9546                DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547        }
9548
9549        bridge_pos = pci_pcie_cap(root);
9550        if (!bridge_pos)
9551                return;
9552
9553        gpu_pos = pci_pcie_cap(rdev->pdev);
9554        if (!gpu_pos)
9555                return;
9556
9557        if (speed_cap == PCIE_SPEED_8_0GT) {
9558                /* re-try equalization if gen3 is not already enabled */
9559                if (current_data_rate != 2) {
9560                        u16 bridge_cfg, gpu_cfg;
9561                        u16 bridge_cfg2, gpu_cfg2;
9562                        u32 max_lw, current_lw, tmp;
9563
9564                        pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565                        pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566
9567                        tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568                        pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569
9570                        tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571                        pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572
9573                        tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574                        max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575                        current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576
9577                        if (current_lw < max_lw) {
9578                                tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579                                if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580                                        tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581                                        tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582                                        tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583                                        WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584                                }
9585                        }
9586
9587                        for (i = 0; i < 10; i++) {
9588                                /* check status */
9589                                pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9590                                if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591                                        break;
9592
9593                                pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594                                pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595
9596                                pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597                                pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9598
9599                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600                                tmp |= LC_SET_QUIESCE;
9601                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602
9603                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604                                tmp |= LC_REDO_EQ;
9605                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606
9607                                msleep(100);
9608
9609                                /* linkctl */
9610                                pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612                                tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613                                pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614
9615                                pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617                                tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618                                pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619
9620                                /* linkctl2 */
9621                                pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622                                tmp16 &= ~((1 << 4) | (7 << 9));
9623                                tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624                                pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625
9626                                pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627                                tmp16 &= ~((1 << 4) | (7 << 9));
9628                                tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629                                pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9630
9631                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632                                tmp &= ~LC_SET_QUIESCE;
9633                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634                        }
9635                }
9636        }
9637
9638        /* set the link speed */
9639        speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640        speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642
9643        pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644        tmp16 &= ~0xf;
9645        if (speed_cap == PCIE_SPEED_8_0GT)
9646                tmp16 |= 3; /* gen3 */
9647        else if (speed_cap == PCIE_SPEED_5_0GT)
9648                tmp16 |= 2; /* gen2 */
9649        else
9650                tmp16 |= 1; /* gen1 */
9651        pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9652
9653        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654        speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656
9657        for (i = 0; i < rdev->usec_timeout; i++) {
9658                speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659                if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660                        break;
9661                udelay(1);
9662        }
9663}
9664
9665static void cik_program_aspm(struct radeon_device *rdev)
9666{
9667        u32 data, orig;
9668        bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669        bool disable_clkreq = false;
9670
9671        if (radeon_aspm == 0)
9672                return;
9673
9674        /* XXX double check IGPs */
9675        if (rdev->flags & RADEON_IS_IGP)
9676                return;
9677
9678        if (!(rdev->flags & RADEON_IS_PCIE))
9679                return;
9680
9681        orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682        data &= ~LC_XMIT_N_FTS_MASK;
9683        data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684        if (orig != data)
9685                WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686
9687        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688        data |= LC_GO_TO_RECOVERY;
9689        if (orig != data)
9690                WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691
9692        orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693        data |= P_IGNORE_EDB_ERR;
9694        if (orig != data)
9695                WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696
9697        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698        data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699        data |= LC_PMI_TO_L1_DIS;
9700        if (!disable_l0s)
9701                data |= LC_L0S_INACTIVITY(7);
9702
9703        if (!disable_l1) {
9704                data |= LC_L1_INACTIVITY(7);
9705                data &= ~LC_PMI_TO_L1_DIS;
9706                if (orig != data)
9707                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708
9709                if (!disable_plloff_in_l1) {
9710                        bool clk_req_support;
9711
9712                        orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715                        if (orig != data)
9716                                WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717
9718                        orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721                        if (orig != data)
9722                                WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723
9724                        orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727                        if (orig != data)
9728                                WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729
9730                        orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733                        if (orig != data)
9734                                WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735
9736                        orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737                        data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738                        data |= LC_DYN_LANES_PWR_STATE(3);
9739                        if (orig != data)
9740                                WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741
9742                        if (!disable_clkreq &&
9743                            !pci_is_root_bus(rdev->pdev->bus)) {
9744                                struct pci_dev *root = rdev->pdev->bus->self;
9745                                u32 lnkcap;
9746
9747                                clk_req_support = false;
9748                                pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749                                if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750                                        clk_req_support = true;
9751                        } else {
9752                                clk_req_support = false;
9753                        }
9754
9755                        if (clk_req_support) {
9756                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757                                data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758                                if (orig != data)
9759                                        WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760
9761                                orig = data = RREG32_SMC(THM_CLK_CNTL);
9762                                data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763                                data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764                                if (orig != data)
9765                                        WREG32_SMC(THM_CLK_CNTL, data);
9766
9767                                orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768                                data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769                                data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770                                if (orig != data)
9771                                        WREG32_SMC(MISC_CLK_CTRL, data);
9772
9773                                orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774                                data &= ~BCLK_AS_XCLK;
9775                                if (orig != data)
9776                                        WREG32_SMC(CG_CLKPIN_CNTL, data);
9777
9778                                orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779                                data &= ~FORCE_BIF_REFCLK_EN;
9780                                if (orig != data)
9781                                        WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782
9783                                orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784                                data &= ~MPLL_CLKOUT_SEL_MASK;
9785                                data |= MPLL_CLKOUT_SEL(4);
9786                                if (orig != data)
9787                                        WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788                        }
9789                }
9790        } else {
9791                if (orig != data)
9792                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793        }
9794
9795        orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796        data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797        if (orig != data)
9798                WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799
9800        if (!disable_l0s) {
9801                data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802                if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803                        data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804                        if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806                                data &= ~LC_L0S_INACTIVITY_MASK;
9807                                if (orig != data)
9808                                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809                        }
9810                }
9811        }
9812}
9813