linux/drivers/gpu/drm/radeon/si.c
<<
>>
Prefs
   1/*
   2 * Copyright 2011 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
  26#include <linux/module.h>
  27#include <linux/pci.h>
  28#include <linux/slab.h>
  29
  30#include <drm/drm_vblank.h>
  31#include <drm/radeon_drm.h>
  32
  33#include "atom.h"
  34#include "clearstate_si.h"
  35#include "evergreen.h"
  36#include "r600.h"
  37#include "radeon.h"
  38#include "radeon_asic.h"
  39#include "radeon_audio.h"
  40#include "radeon_ucode.h"
  41#include "si_blit_shaders.h"
  42#include "si.h"
  43#include "sid.h"
  44
  45
  46MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
  47MODULE_FIRMWARE("radeon/TAHITI_me.bin");
  48MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
  49MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
  50MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
  51MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
  52MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
  53
  54MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
  55MODULE_FIRMWARE("radeon/tahiti_me.bin");
  56MODULE_FIRMWARE("radeon/tahiti_ce.bin");
  57MODULE_FIRMWARE("radeon/tahiti_mc.bin");
  58MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
  59MODULE_FIRMWARE("radeon/tahiti_smc.bin");
  60
  61MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
  62MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
  63MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
  64MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
  65MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
  66MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
  67MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
  68
  69MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
  70MODULE_FIRMWARE("radeon/pitcairn_me.bin");
  71MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
  72MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
  73MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
  74MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
  75MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
  76
  77MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
  78MODULE_FIRMWARE("radeon/VERDE_me.bin");
  79MODULE_FIRMWARE("radeon/VERDE_ce.bin");
  80MODULE_FIRMWARE("radeon/VERDE_mc.bin");
  81MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
  82MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
  83MODULE_FIRMWARE("radeon/VERDE_smc.bin");
  84
  85MODULE_FIRMWARE("radeon/verde_pfp.bin");
  86MODULE_FIRMWARE("radeon/verde_me.bin");
  87MODULE_FIRMWARE("radeon/verde_ce.bin");
  88MODULE_FIRMWARE("radeon/verde_mc.bin");
  89MODULE_FIRMWARE("radeon/verde_rlc.bin");
  90MODULE_FIRMWARE("radeon/verde_smc.bin");
  91MODULE_FIRMWARE("radeon/verde_k_smc.bin");
  92
  93MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
  94MODULE_FIRMWARE("radeon/OLAND_me.bin");
  95MODULE_FIRMWARE("radeon/OLAND_ce.bin");
  96MODULE_FIRMWARE("radeon/OLAND_mc.bin");
  97MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
  98MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
  99MODULE_FIRMWARE("radeon/OLAND_smc.bin");
 100
 101MODULE_FIRMWARE("radeon/oland_pfp.bin");
 102MODULE_FIRMWARE("radeon/oland_me.bin");
 103MODULE_FIRMWARE("radeon/oland_ce.bin");
 104MODULE_FIRMWARE("radeon/oland_mc.bin");
 105MODULE_FIRMWARE("radeon/oland_rlc.bin");
 106MODULE_FIRMWARE("radeon/oland_smc.bin");
 107MODULE_FIRMWARE("radeon/oland_k_smc.bin");
 108
 109MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
 110MODULE_FIRMWARE("radeon/HAINAN_me.bin");
 111MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
 112MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
 113MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 114MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 115MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 116
 117MODULE_FIRMWARE("radeon/hainan_pfp.bin");
 118MODULE_FIRMWARE("radeon/hainan_me.bin");
 119MODULE_FIRMWARE("radeon/hainan_ce.bin");
 120MODULE_FIRMWARE("radeon/hainan_mc.bin");
 121MODULE_FIRMWARE("radeon/hainan_rlc.bin");
 122MODULE_FIRMWARE("radeon/hainan_smc.bin");
 123MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
 124MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
 125
 126MODULE_FIRMWARE("radeon/si58_mc.bin");
 127
 128static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 129static void si_pcie_gen3_enable(struct radeon_device *rdev);
 130static void si_program_aspm(struct radeon_device *rdev);
 131extern void sumo_rlc_fini(struct radeon_device *rdev);
 132extern int sumo_rlc_init(struct radeon_device *rdev);
 133static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
 134                                         bool enable);
 135static void si_init_pg(struct radeon_device *rdev);
 136static void si_init_cg(struct radeon_device *rdev);
 137static void si_fini_pg(struct radeon_device *rdev);
 138static void si_fini_cg(struct radeon_device *rdev);
 139static void si_rlc_stop(struct radeon_device *rdev);
 140
 141static const u32 crtc_offsets[] =
 142{
 143        EVERGREEN_CRTC0_REGISTER_OFFSET,
 144        EVERGREEN_CRTC1_REGISTER_OFFSET,
 145        EVERGREEN_CRTC2_REGISTER_OFFSET,
 146        EVERGREEN_CRTC3_REGISTER_OFFSET,
 147        EVERGREEN_CRTC4_REGISTER_OFFSET,
 148        EVERGREEN_CRTC5_REGISTER_OFFSET
 149};
 150
 151static const u32 si_disp_int_status[] =
 152{
 153        DISP_INTERRUPT_STATUS,
 154        DISP_INTERRUPT_STATUS_CONTINUE,
 155        DISP_INTERRUPT_STATUS_CONTINUE2,
 156        DISP_INTERRUPT_STATUS_CONTINUE3,
 157        DISP_INTERRUPT_STATUS_CONTINUE4,
 158        DISP_INTERRUPT_STATUS_CONTINUE5
 159};
 160
 161#define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
 162#define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
 163#define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
 164
 165static const u32 verde_rlc_save_restore_register_list[] =
 166{
 167        (0x8000 << 16) | (0x98f4 >> 2),
 168        0x00000000,
 169        (0x8040 << 16) | (0x98f4 >> 2),
 170        0x00000000,
 171        (0x8000 << 16) | (0xe80 >> 2),
 172        0x00000000,
 173        (0x8040 << 16) | (0xe80 >> 2),
 174        0x00000000,
 175        (0x8000 << 16) | (0x89bc >> 2),
 176        0x00000000,
 177        (0x8040 << 16) | (0x89bc >> 2),
 178        0x00000000,
 179        (0x8000 << 16) | (0x8c1c >> 2),
 180        0x00000000,
 181        (0x8040 << 16) | (0x8c1c >> 2),
 182        0x00000000,
 183        (0x9c00 << 16) | (0x98f0 >> 2),
 184        0x00000000,
 185        (0x9c00 << 16) | (0xe7c >> 2),
 186        0x00000000,
 187        (0x8000 << 16) | (0x9148 >> 2),
 188        0x00000000,
 189        (0x8040 << 16) | (0x9148 >> 2),
 190        0x00000000,
 191        (0x9c00 << 16) | (0x9150 >> 2),
 192        0x00000000,
 193        (0x9c00 << 16) | (0x897c >> 2),
 194        0x00000000,
 195        (0x9c00 << 16) | (0x8d8c >> 2),
 196        0x00000000,
 197        (0x9c00 << 16) | (0xac54 >> 2),
 198        0X00000000,
 199        0x3,
 200        (0x9c00 << 16) | (0x98f8 >> 2),
 201        0x00000000,
 202        (0x9c00 << 16) | (0x9910 >> 2),
 203        0x00000000,
 204        (0x9c00 << 16) | (0x9914 >> 2),
 205        0x00000000,
 206        (0x9c00 << 16) | (0x9918 >> 2),
 207        0x00000000,
 208        (0x9c00 << 16) | (0x991c >> 2),
 209        0x00000000,
 210        (0x9c00 << 16) | (0x9920 >> 2),
 211        0x00000000,
 212        (0x9c00 << 16) | (0x9924 >> 2),
 213        0x00000000,
 214        (0x9c00 << 16) | (0x9928 >> 2),
 215        0x00000000,
 216        (0x9c00 << 16) | (0x992c >> 2),
 217        0x00000000,
 218        (0x9c00 << 16) | (0x9930 >> 2),
 219        0x00000000,
 220        (0x9c00 << 16) | (0x9934 >> 2),
 221        0x00000000,
 222        (0x9c00 << 16) | (0x9938 >> 2),
 223        0x00000000,
 224        (0x9c00 << 16) | (0x993c >> 2),
 225        0x00000000,
 226        (0x9c00 << 16) | (0x9940 >> 2),
 227        0x00000000,
 228        (0x9c00 << 16) | (0x9944 >> 2),
 229        0x00000000,
 230        (0x9c00 << 16) | (0x9948 >> 2),
 231        0x00000000,
 232        (0x9c00 << 16) | (0x994c >> 2),
 233        0x00000000,
 234        (0x9c00 << 16) | (0x9950 >> 2),
 235        0x00000000,
 236        (0x9c00 << 16) | (0x9954 >> 2),
 237        0x00000000,
 238        (0x9c00 << 16) | (0x9958 >> 2),
 239        0x00000000,
 240        (0x9c00 << 16) | (0x995c >> 2),
 241        0x00000000,
 242        (0x9c00 << 16) | (0x9960 >> 2),
 243        0x00000000,
 244        (0x9c00 << 16) | (0x9964 >> 2),
 245        0x00000000,
 246        (0x9c00 << 16) | (0x9968 >> 2),
 247        0x00000000,
 248        (0x9c00 << 16) | (0x996c >> 2),
 249        0x00000000,
 250        (0x9c00 << 16) | (0x9970 >> 2),
 251        0x00000000,
 252        (0x9c00 << 16) | (0x9974 >> 2),
 253        0x00000000,
 254        (0x9c00 << 16) | (0x9978 >> 2),
 255        0x00000000,
 256        (0x9c00 << 16) | (0x997c >> 2),
 257        0x00000000,
 258        (0x9c00 << 16) | (0x9980 >> 2),
 259        0x00000000,
 260        (0x9c00 << 16) | (0x9984 >> 2),
 261        0x00000000,
 262        (0x9c00 << 16) | (0x9988 >> 2),
 263        0x00000000,
 264        (0x9c00 << 16) | (0x998c >> 2),
 265        0x00000000,
 266        (0x9c00 << 16) | (0x8c00 >> 2),
 267        0x00000000,
 268        (0x9c00 << 16) | (0x8c14 >> 2),
 269        0x00000000,
 270        (0x9c00 << 16) | (0x8c04 >> 2),
 271        0x00000000,
 272        (0x9c00 << 16) | (0x8c08 >> 2),
 273        0x00000000,
 274        (0x8000 << 16) | (0x9b7c >> 2),
 275        0x00000000,
 276        (0x8040 << 16) | (0x9b7c >> 2),
 277        0x00000000,
 278        (0x8000 << 16) | (0xe84 >> 2),
 279        0x00000000,
 280        (0x8040 << 16) | (0xe84 >> 2),
 281        0x00000000,
 282        (0x8000 << 16) | (0x89c0 >> 2),
 283        0x00000000,
 284        (0x8040 << 16) | (0x89c0 >> 2),
 285        0x00000000,
 286        (0x8000 << 16) | (0x914c >> 2),
 287        0x00000000,
 288        (0x8040 << 16) | (0x914c >> 2),
 289        0x00000000,
 290        (0x8000 << 16) | (0x8c20 >> 2),
 291        0x00000000,
 292        (0x8040 << 16) | (0x8c20 >> 2),
 293        0x00000000,
 294        (0x8000 << 16) | (0x9354 >> 2),
 295        0x00000000,
 296        (0x8040 << 16) | (0x9354 >> 2),
 297        0x00000000,
 298        (0x9c00 << 16) | (0x9060 >> 2),
 299        0x00000000,
 300        (0x9c00 << 16) | (0x9364 >> 2),
 301        0x00000000,
 302        (0x9c00 << 16) | (0x9100 >> 2),
 303        0x00000000,
 304        (0x9c00 << 16) | (0x913c >> 2),
 305        0x00000000,
 306        (0x8000 << 16) | (0x90e0 >> 2),
 307        0x00000000,
 308        (0x8000 << 16) | (0x90e4 >> 2),
 309        0x00000000,
 310        (0x8000 << 16) | (0x90e8 >> 2),
 311        0x00000000,
 312        (0x8040 << 16) | (0x90e0 >> 2),
 313        0x00000000,
 314        (0x8040 << 16) | (0x90e4 >> 2),
 315        0x00000000,
 316        (0x8040 << 16) | (0x90e8 >> 2),
 317        0x00000000,
 318        (0x9c00 << 16) | (0x8bcc >> 2),
 319        0x00000000,
 320        (0x9c00 << 16) | (0x8b24 >> 2),
 321        0x00000000,
 322        (0x9c00 << 16) | (0x88c4 >> 2),
 323        0x00000000,
 324        (0x9c00 << 16) | (0x8e50 >> 2),
 325        0x00000000,
 326        (0x9c00 << 16) | (0x8c0c >> 2),
 327        0x00000000,
 328        (0x9c00 << 16) | (0x8e58 >> 2),
 329        0x00000000,
 330        (0x9c00 << 16) | (0x8e5c >> 2),
 331        0x00000000,
 332        (0x9c00 << 16) | (0x9508 >> 2),
 333        0x00000000,
 334        (0x9c00 << 16) | (0x950c >> 2),
 335        0x00000000,
 336        (0x9c00 << 16) | (0x9494 >> 2),
 337        0x00000000,
 338        (0x9c00 << 16) | (0xac0c >> 2),
 339        0x00000000,
 340        (0x9c00 << 16) | (0xac10 >> 2),
 341        0x00000000,
 342        (0x9c00 << 16) | (0xac14 >> 2),
 343        0x00000000,
 344        (0x9c00 << 16) | (0xae00 >> 2),
 345        0x00000000,
 346        (0x9c00 << 16) | (0xac08 >> 2),
 347        0x00000000,
 348        (0x9c00 << 16) | (0x88d4 >> 2),
 349        0x00000000,
 350        (0x9c00 << 16) | (0x88c8 >> 2),
 351        0x00000000,
 352        (0x9c00 << 16) | (0x88cc >> 2),
 353        0x00000000,
 354        (0x9c00 << 16) | (0x89b0 >> 2),
 355        0x00000000,
 356        (0x9c00 << 16) | (0x8b10 >> 2),
 357        0x00000000,
 358        (0x9c00 << 16) | (0x8a14 >> 2),
 359        0x00000000,
 360        (0x9c00 << 16) | (0x9830 >> 2),
 361        0x00000000,
 362        (0x9c00 << 16) | (0x9834 >> 2),
 363        0x00000000,
 364        (0x9c00 << 16) | (0x9838 >> 2),
 365        0x00000000,
 366        (0x9c00 << 16) | (0x9a10 >> 2),
 367        0x00000000,
 368        (0x8000 << 16) | (0x9870 >> 2),
 369        0x00000000,
 370        (0x8000 << 16) | (0x9874 >> 2),
 371        0x00000000,
 372        (0x8001 << 16) | (0x9870 >> 2),
 373        0x00000000,
 374        (0x8001 << 16) | (0x9874 >> 2),
 375        0x00000000,
 376        (0x8040 << 16) | (0x9870 >> 2),
 377        0x00000000,
 378        (0x8040 << 16) | (0x9874 >> 2),
 379        0x00000000,
 380        (0x8041 << 16) | (0x9870 >> 2),
 381        0x00000000,
 382        (0x8041 << 16) | (0x9874 >> 2),
 383        0x00000000,
 384        0x00000000
 385};
 386
 387static const u32 tahiti_golden_rlc_registers[] =
 388{
 389        0xc424, 0xffffffff, 0x00601005,
 390        0xc47c, 0xffffffff, 0x10104040,
 391        0xc488, 0xffffffff, 0x0100000a,
 392        0xc314, 0xffffffff, 0x00000800,
 393        0xc30c, 0xffffffff, 0x800000f4,
 394        0xf4a8, 0xffffffff, 0x00000000
 395};
 396
 397static const u32 tahiti_golden_registers[] =
 398{
 399        0x9a10, 0x00010000, 0x00018208,
 400        0x9830, 0xffffffff, 0x00000000,
 401        0x9834, 0xf00fffff, 0x00000400,
 402        0x9838, 0x0002021c, 0x00020200,
 403        0xc78, 0x00000080, 0x00000000,
 404        0xd030, 0x000300c0, 0x00800040,
 405        0xd830, 0x000300c0, 0x00800040,
 406        0x5bb0, 0x000000f0, 0x00000070,
 407        0x5bc0, 0x00200000, 0x50100000,
 408        0x7030, 0x31000311, 0x00000011,
 409        0x277c, 0x00000003, 0x000007ff,
 410        0x240c, 0x000007ff, 0x00000000,
 411        0x8a14, 0xf000001f, 0x00000007,
 412        0x8b24, 0xffffffff, 0x00ffffff,
 413        0x8b10, 0x0000ff0f, 0x00000000,
 414        0x28a4c, 0x07ffffff, 0x4e000000,
 415        0x28350, 0x3f3f3fff, 0x2a00126a,
 416        0x30, 0x000000ff, 0x0040,
 417        0x34, 0x00000040, 0x00004040,
 418        0x9100, 0x07ffffff, 0x03000000,
 419        0x8e88, 0x01ff1f3f, 0x00000000,
 420        0x8e84, 0x01ff1f3f, 0x00000000,
 421        0x9060, 0x0000007f, 0x00000020,
 422        0x9508, 0x00010000, 0x00010000,
 423        0xac14, 0x00000200, 0x000002fb,
 424        0xac10, 0xffffffff, 0x0000543b,
 425        0xac0c, 0xffffffff, 0xa9210876,
 426        0x88d0, 0xffffffff, 0x000fff40,
 427        0x88d4, 0x0000001f, 0x00000010,
 428        0x1410, 0x20000000, 0x20fffed8,
 429        0x15c0, 0x000c0fc0, 0x000c0400
 430};
 431
 432static const u32 tahiti_golden_registers2[] =
 433{
 434        0xc64, 0x00000001, 0x00000001
 435};
 436
 437static const u32 pitcairn_golden_rlc_registers[] =
 438{
 439        0xc424, 0xffffffff, 0x00601004,
 440        0xc47c, 0xffffffff, 0x10102020,
 441        0xc488, 0xffffffff, 0x01000020,
 442        0xc314, 0xffffffff, 0x00000800,
 443        0xc30c, 0xffffffff, 0x800000a4
 444};
 445
 446static const u32 pitcairn_golden_registers[] =
 447{
 448        0x9a10, 0x00010000, 0x00018208,
 449        0x9830, 0xffffffff, 0x00000000,
 450        0x9834, 0xf00fffff, 0x00000400,
 451        0x9838, 0x0002021c, 0x00020200,
 452        0xc78, 0x00000080, 0x00000000,
 453        0xd030, 0x000300c0, 0x00800040,
 454        0xd830, 0x000300c0, 0x00800040,
 455        0x5bb0, 0x000000f0, 0x00000070,
 456        0x5bc0, 0x00200000, 0x50100000,
 457        0x7030, 0x31000311, 0x00000011,
 458        0x2ae4, 0x00073ffe, 0x000022a2,
 459        0x240c, 0x000007ff, 0x00000000,
 460        0x8a14, 0xf000001f, 0x00000007,
 461        0x8b24, 0xffffffff, 0x00ffffff,
 462        0x8b10, 0x0000ff0f, 0x00000000,
 463        0x28a4c, 0x07ffffff, 0x4e000000,
 464        0x28350, 0x3f3f3fff, 0x2a00126a,
 465        0x30, 0x000000ff, 0x0040,
 466        0x34, 0x00000040, 0x00004040,
 467        0x9100, 0x07ffffff, 0x03000000,
 468        0x9060, 0x0000007f, 0x00000020,
 469        0x9508, 0x00010000, 0x00010000,
 470        0xac14, 0x000003ff, 0x000000f7,
 471        0xac10, 0xffffffff, 0x00000000,
 472        0xac0c, 0xffffffff, 0x32761054,
 473        0x88d4, 0x0000001f, 0x00000010,
 474        0x15c0, 0x000c0fc0, 0x000c0400
 475};
 476
 477static const u32 verde_golden_rlc_registers[] =
 478{
 479        0xc424, 0xffffffff, 0x033f1005,
 480        0xc47c, 0xffffffff, 0x10808020,
 481        0xc488, 0xffffffff, 0x00800008,
 482        0xc314, 0xffffffff, 0x00001000,
 483        0xc30c, 0xffffffff, 0x80010014
 484};
 485
 486static const u32 verde_golden_registers[] =
 487{
 488        0x9a10, 0x00010000, 0x00018208,
 489        0x9830, 0xffffffff, 0x00000000,
 490        0x9834, 0xf00fffff, 0x00000400,
 491        0x9838, 0x0002021c, 0x00020200,
 492        0xc78, 0x00000080, 0x00000000,
 493        0xd030, 0x000300c0, 0x00800040,
 494        0xd030, 0x000300c0, 0x00800040,
 495        0xd830, 0x000300c0, 0x00800040,
 496        0xd830, 0x000300c0, 0x00800040,
 497        0x5bb0, 0x000000f0, 0x00000070,
 498        0x5bc0, 0x00200000, 0x50100000,
 499        0x7030, 0x31000311, 0x00000011,
 500        0x2ae4, 0x00073ffe, 0x000022a2,
 501        0x2ae4, 0x00073ffe, 0x000022a2,
 502        0x2ae4, 0x00073ffe, 0x000022a2,
 503        0x240c, 0x000007ff, 0x00000000,
 504        0x240c, 0x000007ff, 0x00000000,
 505        0x240c, 0x000007ff, 0x00000000,
 506        0x8a14, 0xf000001f, 0x00000007,
 507        0x8a14, 0xf000001f, 0x00000007,
 508        0x8a14, 0xf000001f, 0x00000007,
 509        0x8b24, 0xffffffff, 0x00ffffff,
 510        0x8b10, 0x0000ff0f, 0x00000000,
 511        0x28a4c, 0x07ffffff, 0x4e000000,
 512        0x28350, 0x3f3f3fff, 0x0000124a,
 513        0x28350, 0x3f3f3fff, 0x0000124a,
 514        0x28350, 0x3f3f3fff, 0x0000124a,
 515        0x30, 0x000000ff, 0x0040,
 516        0x34, 0x00000040, 0x00004040,
 517        0x9100, 0x07ffffff, 0x03000000,
 518        0x9100, 0x07ffffff, 0x03000000,
 519        0x8e88, 0x01ff1f3f, 0x00000000,
 520        0x8e88, 0x01ff1f3f, 0x00000000,
 521        0x8e88, 0x01ff1f3f, 0x00000000,
 522        0x8e84, 0x01ff1f3f, 0x00000000,
 523        0x8e84, 0x01ff1f3f, 0x00000000,
 524        0x8e84, 0x01ff1f3f, 0x00000000,
 525        0x9060, 0x0000007f, 0x00000020,
 526        0x9508, 0x00010000, 0x00010000,
 527        0xac14, 0x000003ff, 0x00000003,
 528        0xac14, 0x000003ff, 0x00000003,
 529        0xac14, 0x000003ff, 0x00000003,
 530        0xac10, 0xffffffff, 0x00000000,
 531        0xac10, 0xffffffff, 0x00000000,
 532        0xac10, 0xffffffff, 0x00000000,
 533        0xac0c, 0xffffffff, 0x00001032,
 534        0xac0c, 0xffffffff, 0x00001032,
 535        0xac0c, 0xffffffff, 0x00001032,
 536        0x88d4, 0x0000001f, 0x00000010,
 537        0x88d4, 0x0000001f, 0x00000010,
 538        0x88d4, 0x0000001f, 0x00000010,
 539        0x15c0, 0x000c0fc0, 0x000c0400
 540};
 541
 542static const u32 oland_golden_rlc_registers[] =
 543{
 544        0xc424, 0xffffffff, 0x00601005,
 545        0xc47c, 0xffffffff, 0x10104040,
 546        0xc488, 0xffffffff, 0x0100000a,
 547        0xc314, 0xffffffff, 0x00000800,
 548        0xc30c, 0xffffffff, 0x800000f4
 549};
 550
 551static const u32 oland_golden_registers[] =
 552{
 553        0x9a10, 0x00010000, 0x00018208,
 554        0x9830, 0xffffffff, 0x00000000,
 555        0x9834, 0xf00fffff, 0x00000400,
 556        0x9838, 0x0002021c, 0x00020200,
 557        0xc78, 0x00000080, 0x00000000,
 558        0xd030, 0x000300c0, 0x00800040,
 559        0xd830, 0x000300c0, 0x00800040,
 560        0x5bb0, 0x000000f0, 0x00000070,
 561        0x5bc0, 0x00200000, 0x50100000,
 562        0x7030, 0x31000311, 0x00000011,
 563        0x2ae4, 0x00073ffe, 0x000022a2,
 564        0x240c, 0x000007ff, 0x00000000,
 565        0x8a14, 0xf000001f, 0x00000007,
 566        0x8b24, 0xffffffff, 0x00ffffff,
 567        0x8b10, 0x0000ff0f, 0x00000000,
 568        0x28a4c, 0x07ffffff, 0x4e000000,
 569        0x28350, 0x3f3f3fff, 0x00000082,
 570        0x30, 0x000000ff, 0x0040,
 571        0x34, 0x00000040, 0x00004040,
 572        0x9100, 0x07ffffff, 0x03000000,
 573        0x9060, 0x0000007f, 0x00000020,
 574        0x9508, 0x00010000, 0x00010000,
 575        0xac14, 0x000003ff, 0x000000f3,
 576        0xac10, 0xffffffff, 0x00000000,
 577        0xac0c, 0xffffffff, 0x00003210,
 578        0x88d4, 0x0000001f, 0x00000010,
 579        0x15c0, 0x000c0fc0, 0x000c0400
 580};
 581
 582static const u32 hainan_golden_registers[] =
 583{
 584        0x9a10, 0x00010000, 0x00018208,
 585        0x9830, 0xffffffff, 0x00000000,
 586        0x9834, 0xf00fffff, 0x00000400,
 587        0x9838, 0x0002021c, 0x00020200,
 588        0xd0c0, 0xff000fff, 0x00000100,
 589        0xd030, 0x000300c0, 0x00800040,
 590        0xd8c0, 0xff000fff, 0x00000100,
 591        0xd830, 0x000300c0, 0x00800040,
 592        0x2ae4, 0x00073ffe, 0x000022a2,
 593        0x240c, 0x000007ff, 0x00000000,
 594        0x8a14, 0xf000001f, 0x00000007,
 595        0x8b24, 0xffffffff, 0x00ffffff,
 596        0x8b10, 0x0000ff0f, 0x00000000,
 597        0x28a4c, 0x07ffffff, 0x4e000000,
 598        0x28350, 0x3f3f3fff, 0x00000000,
 599        0x30, 0x000000ff, 0x0040,
 600        0x34, 0x00000040, 0x00004040,
 601        0x9100, 0x03e00000, 0x03600000,
 602        0x9060, 0x0000007f, 0x00000020,
 603        0x9508, 0x00010000, 0x00010000,
 604        0xac14, 0x000003ff, 0x000000f1,
 605        0xac10, 0xffffffff, 0x00000000,
 606        0xac0c, 0xffffffff, 0x00003210,
 607        0x88d4, 0x0000001f, 0x00000010,
 608        0x15c0, 0x000c0fc0, 0x000c0400
 609};
 610
 611static const u32 hainan_golden_registers2[] =
 612{
 613        0x98f8, 0xffffffff, 0x02010001
 614};
 615
 616static const u32 tahiti_mgcg_cgcg_init[] =
 617{
 618        0xc400, 0xffffffff, 0xfffffffc,
 619        0x802c, 0xffffffff, 0xe0000000,
 620        0x9a60, 0xffffffff, 0x00000100,
 621        0x92a4, 0xffffffff, 0x00000100,
 622        0xc164, 0xffffffff, 0x00000100,
 623        0x9774, 0xffffffff, 0x00000100,
 624        0x8984, 0xffffffff, 0x06000100,
 625        0x8a18, 0xffffffff, 0x00000100,
 626        0x92a0, 0xffffffff, 0x00000100,
 627        0xc380, 0xffffffff, 0x00000100,
 628        0x8b28, 0xffffffff, 0x00000100,
 629        0x9144, 0xffffffff, 0x00000100,
 630        0x8d88, 0xffffffff, 0x00000100,
 631        0x8d8c, 0xffffffff, 0x00000100,
 632        0x9030, 0xffffffff, 0x00000100,
 633        0x9034, 0xffffffff, 0x00000100,
 634        0x9038, 0xffffffff, 0x00000100,
 635        0x903c, 0xffffffff, 0x00000100,
 636        0xad80, 0xffffffff, 0x00000100,
 637        0xac54, 0xffffffff, 0x00000100,
 638        0x897c, 0xffffffff, 0x06000100,
 639        0x9868, 0xffffffff, 0x00000100,
 640        0x9510, 0xffffffff, 0x00000100,
 641        0xaf04, 0xffffffff, 0x00000100,
 642        0xae04, 0xffffffff, 0x00000100,
 643        0x949c, 0xffffffff, 0x00000100,
 644        0x802c, 0xffffffff, 0xe0000000,
 645        0x9160, 0xffffffff, 0x00010000,
 646        0x9164, 0xffffffff, 0x00030002,
 647        0x9168, 0xffffffff, 0x00040007,
 648        0x916c, 0xffffffff, 0x00060005,
 649        0x9170, 0xffffffff, 0x00090008,
 650        0x9174, 0xffffffff, 0x00020001,
 651        0x9178, 0xffffffff, 0x00040003,
 652        0x917c, 0xffffffff, 0x00000007,
 653        0x9180, 0xffffffff, 0x00060005,
 654        0x9184, 0xffffffff, 0x00090008,
 655        0x9188, 0xffffffff, 0x00030002,
 656        0x918c, 0xffffffff, 0x00050004,
 657        0x9190, 0xffffffff, 0x00000008,
 658        0x9194, 0xffffffff, 0x00070006,
 659        0x9198, 0xffffffff, 0x000a0009,
 660        0x919c, 0xffffffff, 0x00040003,
 661        0x91a0, 0xffffffff, 0x00060005,
 662        0x91a4, 0xffffffff, 0x00000009,
 663        0x91a8, 0xffffffff, 0x00080007,
 664        0x91ac, 0xffffffff, 0x000b000a,
 665        0x91b0, 0xffffffff, 0x00050004,
 666        0x91b4, 0xffffffff, 0x00070006,
 667        0x91b8, 0xffffffff, 0x0008000b,
 668        0x91bc, 0xffffffff, 0x000a0009,
 669        0x91c0, 0xffffffff, 0x000d000c,
 670        0x91c4, 0xffffffff, 0x00060005,
 671        0x91c8, 0xffffffff, 0x00080007,
 672        0x91cc, 0xffffffff, 0x0000000b,
 673        0x91d0, 0xffffffff, 0x000a0009,
 674        0x91d4, 0xffffffff, 0x000d000c,
 675        0x91d8, 0xffffffff, 0x00070006,
 676        0x91dc, 0xffffffff, 0x00090008,
 677        0x91e0, 0xffffffff, 0x0000000c,
 678        0x91e4, 0xffffffff, 0x000b000a,
 679        0x91e8, 0xffffffff, 0x000e000d,
 680        0x91ec, 0xffffffff, 0x00080007,
 681        0x91f0, 0xffffffff, 0x000a0009,
 682        0x91f4, 0xffffffff, 0x0000000d,
 683        0x91f8, 0xffffffff, 0x000c000b,
 684        0x91fc, 0xffffffff, 0x000f000e,
 685        0x9200, 0xffffffff, 0x00090008,
 686        0x9204, 0xffffffff, 0x000b000a,
 687        0x9208, 0xffffffff, 0x000c000f,
 688        0x920c, 0xffffffff, 0x000e000d,
 689        0x9210, 0xffffffff, 0x00110010,
 690        0x9214, 0xffffffff, 0x000a0009,
 691        0x9218, 0xffffffff, 0x000c000b,
 692        0x921c, 0xffffffff, 0x0000000f,
 693        0x9220, 0xffffffff, 0x000e000d,
 694        0x9224, 0xffffffff, 0x00110010,
 695        0x9228, 0xffffffff, 0x000b000a,
 696        0x922c, 0xffffffff, 0x000d000c,
 697        0x9230, 0xffffffff, 0x00000010,
 698        0x9234, 0xffffffff, 0x000f000e,
 699        0x9238, 0xffffffff, 0x00120011,
 700        0x923c, 0xffffffff, 0x000c000b,
 701        0x9240, 0xffffffff, 0x000e000d,
 702        0x9244, 0xffffffff, 0x00000011,
 703        0x9248, 0xffffffff, 0x0010000f,
 704        0x924c, 0xffffffff, 0x00130012,
 705        0x9250, 0xffffffff, 0x000d000c,
 706        0x9254, 0xffffffff, 0x000f000e,
 707        0x9258, 0xffffffff, 0x00100013,
 708        0x925c, 0xffffffff, 0x00120011,
 709        0x9260, 0xffffffff, 0x00150014,
 710        0x9264, 0xffffffff, 0x000e000d,
 711        0x9268, 0xffffffff, 0x0010000f,
 712        0x926c, 0xffffffff, 0x00000013,
 713        0x9270, 0xffffffff, 0x00120011,
 714        0x9274, 0xffffffff, 0x00150014,
 715        0x9278, 0xffffffff, 0x000f000e,
 716        0x927c, 0xffffffff, 0x00110010,
 717        0x9280, 0xffffffff, 0x00000014,
 718        0x9284, 0xffffffff, 0x00130012,
 719        0x9288, 0xffffffff, 0x00160015,
 720        0x928c, 0xffffffff, 0x0010000f,
 721        0x9290, 0xffffffff, 0x00120011,
 722        0x9294, 0xffffffff, 0x00000015,
 723        0x9298, 0xffffffff, 0x00140013,
 724        0x929c, 0xffffffff, 0x00170016,
 725        0x9150, 0xffffffff, 0x96940200,
 726        0x8708, 0xffffffff, 0x00900100,
 727        0xc478, 0xffffffff, 0x00000080,
 728        0xc404, 0xffffffff, 0x0020003f,
 729        0x30, 0xffffffff, 0x0000001c,
 730        0x34, 0x000f0000, 0x000f0000,
 731        0x160c, 0xffffffff, 0x00000100,
 732        0x1024, 0xffffffff, 0x00000100,
 733        0x102c, 0x00000101, 0x00000000,
 734        0x20a8, 0xffffffff, 0x00000104,
 735        0x264c, 0x000c0000, 0x000c0000,
 736        0x2648, 0x000c0000, 0x000c0000,
 737        0x55e4, 0xff000fff, 0x00000100,
 738        0x55e8, 0x00000001, 0x00000001,
 739        0x2f50, 0x00000001, 0x00000001,
 740        0x30cc, 0xc0000fff, 0x00000104,
 741        0xc1e4, 0x00000001, 0x00000001,
 742        0xd0c0, 0xfffffff0, 0x00000100,
 743        0xd8c0, 0xfffffff0, 0x00000100
 744};
 745
 746static const u32 pitcairn_mgcg_cgcg_init[] =
 747{
 748        0xc400, 0xffffffff, 0xfffffffc,
 749        0x802c, 0xffffffff, 0xe0000000,
 750        0x9a60, 0xffffffff, 0x00000100,
 751        0x92a4, 0xffffffff, 0x00000100,
 752        0xc164, 0xffffffff, 0x00000100,
 753        0x9774, 0xffffffff, 0x00000100,
 754        0x8984, 0xffffffff, 0x06000100,
 755        0x8a18, 0xffffffff, 0x00000100,
 756        0x92a0, 0xffffffff, 0x00000100,
 757        0xc380, 0xffffffff, 0x00000100,
 758        0x8b28, 0xffffffff, 0x00000100,
 759        0x9144, 0xffffffff, 0x00000100,
 760        0x8d88, 0xffffffff, 0x00000100,
 761        0x8d8c, 0xffffffff, 0x00000100,
 762        0x9030, 0xffffffff, 0x00000100,
 763        0x9034, 0xffffffff, 0x00000100,
 764        0x9038, 0xffffffff, 0x00000100,
 765        0x903c, 0xffffffff, 0x00000100,
 766        0xad80, 0xffffffff, 0x00000100,
 767        0xac54, 0xffffffff, 0x00000100,
 768        0x897c, 0xffffffff, 0x06000100,
 769        0x9868, 0xffffffff, 0x00000100,
 770        0x9510, 0xffffffff, 0x00000100,
 771        0xaf04, 0xffffffff, 0x00000100,
 772        0xae04, 0xffffffff, 0x00000100,
 773        0x949c, 0xffffffff, 0x00000100,
 774        0x802c, 0xffffffff, 0xe0000000,
 775        0x9160, 0xffffffff, 0x00010000,
 776        0x9164, 0xffffffff, 0x00030002,
 777        0x9168, 0xffffffff, 0x00040007,
 778        0x916c, 0xffffffff, 0x00060005,
 779        0x9170, 0xffffffff, 0x00090008,
 780        0x9174, 0xffffffff, 0x00020001,
 781        0x9178, 0xffffffff, 0x00040003,
 782        0x917c, 0xffffffff, 0x00000007,
 783        0x9180, 0xffffffff, 0x00060005,
 784        0x9184, 0xffffffff, 0x00090008,
 785        0x9188, 0xffffffff, 0x00030002,
 786        0x918c, 0xffffffff, 0x00050004,
 787        0x9190, 0xffffffff, 0x00000008,
 788        0x9194, 0xffffffff, 0x00070006,
 789        0x9198, 0xffffffff, 0x000a0009,
 790        0x919c, 0xffffffff, 0x00040003,
 791        0x91a0, 0xffffffff, 0x00060005,
 792        0x91a4, 0xffffffff, 0x00000009,
 793        0x91a8, 0xffffffff, 0x00080007,
 794        0x91ac, 0xffffffff, 0x000b000a,
 795        0x91b0, 0xffffffff, 0x00050004,
 796        0x91b4, 0xffffffff, 0x00070006,
 797        0x91b8, 0xffffffff, 0x0008000b,
 798        0x91bc, 0xffffffff, 0x000a0009,
 799        0x91c0, 0xffffffff, 0x000d000c,
 800        0x9200, 0xffffffff, 0x00090008,
 801        0x9204, 0xffffffff, 0x000b000a,
 802        0x9208, 0xffffffff, 0x000c000f,
 803        0x920c, 0xffffffff, 0x000e000d,
 804        0x9210, 0xffffffff, 0x00110010,
 805        0x9214, 0xffffffff, 0x000a0009,
 806        0x9218, 0xffffffff, 0x000c000b,
 807        0x921c, 0xffffffff, 0x0000000f,
 808        0x9220, 0xffffffff, 0x000e000d,
 809        0x9224, 0xffffffff, 0x00110010,
 810        0x9228, 0xffffffff, 0x000b000a,
 811        0x922c, 0xffffffff, 0x000d000c,
 812        0x9230, 0xffffffff, 0x00000010,
 813        0x9234, 0xffffffff, 0x000f000e,
 814        0x9238, 0xffffffff, 0x00120011,
 815        0x923c, 0xffffffff, 0x000c000b,
 816        0x9240, 0xffffffff, 0x000e000d,
 817        0x9244, 0xffffffff, 0x00000011,
 818        0x9248, 0xffffffff, 0x0010000f,
 819        0x924c, 0xffffffff, 0x00130012,
 820        0x9250, 0xffffffff, 0x000d000c,
 821        0x9254, 0xffffffff, 0x000f000e,
 822        0x9258, 0xffffffff, 0x00100013,
 823        0x925c, 0xffffffff, 0x00120011,
 824        0x9260, 0xffffffff, 0x00150014,
 825        0x9150, 0xffffffff, 0x96940200,
 826        0x8708, 0xffffffff, 0x00900100,
 827        0xc478, 0xffffffff, 0x00000080,
 828        0xc404, 0xffffffff, 0x0020003f,
 829        0x30, 0xffffffff, 0x0000001c,
 830        0x34, 0x000f0000, 0x000f0000,
 831        0x160c, 0xffffffff, 0x00000100,
 832        0x1024, 0xffffffff, 0x00000100,
 833        0x102c, 0x00000101, 0x00000000,
 834        0x20a8, 0xffffffff, 0x00000104,
 835        0x55e4, 0xff000fff, 0x00000100,
 836        0x55e8, 0x00000001, 0x00000001,
 837        0x2f50, 0x00000001, 0x00000001,
 838        0x30cc, 0xc0000fff, 0x00000104,
 839        0xc1e4, 0x00000001, 0x00000001,
 840        0xd0c0, 0xfffffff0, 0x00000100,
 841        0xd8c0, 0xfffffff0, 0x00000100
 842};
 843
 844static const u32 verde_mgcg_cgcg_init[] =
 845{
 846        0xc400, 0xffffffff, 0xfffffffc,
 847        0x802c, 0xffffffff, 0xe0000000,
 848        0x9a60, 0xffffffff, 0x00000100,
 849        0x92a4, 0xffffffff, 0x00000100,
 850        0xc164, 0xffffffff, 0x00000100,
 851        0x9774, 0xffffffff, 0x00000100,
 852        0x8984, 0xffffffff, 0x06000100,
 853        0x8a18, 0xffffffff, 0x00000100,
 854        0x92a0, 0xffffffff, 0x00000100,
 855        0xc380, 0xffffffff, 0x00000100,
 856        0x8b28, 0xffffffff, 0x00000100,
 857        0x9144, 0xffffffff, 0x00000100,
 858        0x8d88, 0xffffffff, 0x00000100,
 859        0x8d8c, 0xffffffff, 0x00000100,
 860        0x9030, 0xffffffff, 0x00000100,
 861        0x9034, 0xffffffff, 0x00000100,
 862        0x9038, 0xffffffff, 0x00000100,
 863        0x903c, 0xffffffff, 0x00000100,
 864        0xad80, 0xffffffff, 0x00000100,
 865        0xac54, 0xffffffff, 0x00000100,
 866        0x897c, 0xffffffff, 0x06000100,
 867        0x9868, 0xffffffff, 0x00000100,
 868        0x9510, 0xffffffff, 0x00000100,
 869        0xaf04, 0xffffffff, 0x00000100,
 870        0xae04, 0xffffffff, 0x00000100,
 871        0x949c, 0xffffffff, 0x00000100,
 872        0x802c, 0xffffffff, 0xe0000000,
 873        0x9160, 0xffffffff, 0x00010000,
 874        0x9164, 0xffffffff, 0x00030002,
 875        0x9168, 0xffffffff, 0x00040007,
 876        0x916c, 0xffffffff, 0x00060005,
 877        0x9170, 0xffffffff, 0x00090008,
 878        0x9174, 0xffffffff, 0x00020001,
 879        0x9178, 0xffffffff, 0x00040003,
 880        0x917c, 0xffffffff, 0x00000007,
 881        0x9180, 0xffffffff, 0x00060005,
 882        0x9184, 0xffffffff, 0x00090008,
 883        0x9188, 0xffffffff, 0x00030002,
 884        0x918c, 0xffffffff, 0x00050004,
 885        0x9190, 0xffffffff, 0x00000008,
 886        0x9194, 0xffffffff, 0x00070006,
 887        0x9198, 0xffffffff, 0x000a0009,
 888        0x919c, 0xffffffff, 0x00040003,
 889        0x91a0, 0xffffffff, 0x00060005,
 890        0x91a4, 0xffffffff, 0x00000009,
 891        0x91a8, 0xffffffff, 0x00080007,
 892        0x91ac, 0xffffffff, 0x000b000a,
 893        0x91b0, 0xffffffff, 0x00050004,
 894        0x91b4, 0xffffffff, 0x00070006,
 895        0x91b8, 0xffffffff, 0x0008000b,
 896        0x91bc, 0xffffffff, 0x000a0009,
 897        0x91c0, 0xffffffff, 0x000d000c,
 898        0x9200, 0xffffffff, 0x00090008,
 899        0x9204, 0xffffffff, 0x000b000a,
 900        0x9208, 0xffffffff, 0x000c000f,
 901        0x920c, 0xffffffff, 0x000e000d,
 902        0x9210, 0xffffffff, 0x00110010,
 903        0x9214, 0xffffffff, 0x000a0009,
 904        0x9218, 0xffffffff, 0x000c000b,
 905        0x921c, 0xffffffff, 0x0000000f,
 906        0x9220, 0xffffffff, 0x000e000d,
 907        0x9224, 0xffffffff, 0x00110010,
 908        0x9228, 0xffffffff, 0x000b000a,
 909        0x922c, 0xffffffff, 0x000d000c,
 910        0x9230, 0xffffffff, 0x00000010,
 911        0x9234, 0xffffffff, 0x000f000e,
 912        0x9238, 0xffffffff, 0x00120011,
 913        0x923c, 0xffffffff, 0x000c000b,
 914        0x9240, 0xffffffff, 0x000e000d,
 915        0x9244, 0xffffffff, 0x00000011,
 916        0x9248, 0xffffffff, 0x0010000f,
 917        0x924c, 0xffffffff, 0x00130012,
 918        0x9250, 0xffffffff, 0x000d000c,
 919        0x9254, 0xffffffff, 0x000f000e,
 920        0x9258, 0xffffffff, 0x00100013,
 921        0x925c, 0xffffffff, 0x00120011,
 922        0x9260, 0xffffffff, 0x00150014,
 923        0x9150, 0xffffffff, 0x96940200,
 924        0x8708, 0xffffffff, 0x00900100,
 925        0xc478, 0xffffffff, 0x00000080,
 926        0xc404, 0xffffffff, 0x0020003f,
 927        0x30, 0xffffffff, 0x0000001c,
 928        0x34, 0x000f0000, 0x000f0000,
 929        0x160c, 0xffffffff, 0x00000100,
 930        0x1024, 0xffffffff, 0x00000100,
 931        0x102c, 0x00000101, 0x00000000,
 932        0x20a8, 0xffffffff, 0x00000104,
 933        0x264c, 0x000c0000, 0x000c0000,
 934        0x2648, 0x000c0000, 0x000c0000,
 935        0x55e4, 0xff000fff, 0x00000100,
 936        0x55e8, 0x00000001, 0x00000001,
 937        0x2f50, 0x00000001, 0x00000001,
 938        0x30cc, 0xc0000fff, 0x00000104,
 939        0xc1e4, 0x00000001, 0x00000001,
 940        0xd0c0, 0xfffffff0, 0x00000100,
 941        0xd8c0, 0xfffffff0, 0x00000100
 942};
 943
 944static const u32 oland_mgcg_cgcg_init[] =
 945{
 946        0xc400, 0xffffffff, 0xfffffffc,
 947        0x802c, 0xffffffff, 0xe0000000,
 948        0x9a60, 0xffffffff, 0x00000100,
 949        0x92a4, 0xffffffff, 0x00000100,
 950        0xc164, 0xffffffff, 0x00000100,
 951        0x9774, 0xffffffff, 0x00000100,
 952        0x8984, 0xffffffff, 0x06000100,
 953        0x8a18, 0xffffffff, 0x00000100,
 954        0x92a0, 0xffffffff, 0x00000100,
 955        0xc380, 0xffffffff, 0x00000100,
 956        0x8b28, 0xffffffff, 0x00000100,
 957        0x9144, 0xffffffff, 0x00000100,
 958        0x8d88, 0xffffffff, 0x00000100,
 959        0x8d8c, 0xffffffff, 0x00000100,
 960        0x9030, 0xffffffff, 0x00000100,
 961        0x9034, 0xffffffff, 0x00000100,
 962        0x9038, 0xffffffff, 0x00000100,
 963        0x903c, 0xffffffff, 0x00000100,
 964        0xad80, 0xffffffff, 0x00000100,
 965        0xac54, 0xffffffff, 0x00000100,
 966        0x897c, 0xffffffff, 0x06000100,
 967        0x9868, 0xffffffff, 0x00000100,
 968        0x9510, 0xffffffff, 0x00000100,
 969        0xaf04, 0xffffffff, 0x00000100,
 970        0xae04, 0xffffffff, 0x00000100,
 971        0x949c, 0xffffffff, 0x00000100,
 972        0x802c, 0xffffffff, 0xe0000000,
 973        0x9160, 0xffffffff, 0x00010000,
 974        0x9164, 0xffffffff, 0x00030002,
 975        0x9168, 0xffffffff, 0x00040007,
 976        0x916c, 0xffffffff, 0x00060005,
 977        0x9170, 0xffffffff, 0x00090008,
 978        0x9174, 0xffffffff, 0x00020001,
 979        0x9178, 0xffffffff, 0x00040003,
 980        0x917c, 0xffffffff, 0x00000007,
 981        0x9180, 0xffffffff, 0x00060005,
 982        0x9184, 0xffffffff, 0x00090008,
 983        0x9188, 0xffffffff, 0x00030002,
 984        0x918c, 0xffffffff, 0x00050004,
 985        0x9190, 0xffffffff, 0x00000008,
 986        0x9194, 0xffffffff, 0x00070006,
 987        0x9198, 0xffffffff, 0x000a0009,
 988        0x919c, 0xffffffff, 0x00040003,
 989        0x91a0, 0xffffffff, 0x00060005,
 990        0x91a4, 0xffffffff, 0x00000009,
 991        0x91a8, 0xffffffff, 0x00080007,
 992        0x91ac, 0xffffffff, 0x000b000a,
 993        0x91b0, 0xffffffff, 0x00050004,
 994        0x91b4, 0xffffffff, 0x00070006,
 995        0x91b8, 0xffffffff, 0x0008000b,
 996        0x91bc, 0xffffffff, 0x000a0009,
 997        0x91c0, 0xffffffff, 0x000d000c,
 998        0x91c4, 0xffffffff, 0x00060005,
 999        0x91c8, 0xffffffff, 0x00080007,
1000        0x91cc, 0xffffffff, 0x0000000b,
1001        0x91d0, 0xffffffff, 0x000a0009,
1002        0x91d4, 0xffffffff, 0x000d000c,
1003        0x9150, 0xffffffff, 0x96940200,
1004        0x8708, 0xffffffff, 0x00900100,
1005        0xc478, 0xffffffff, 0x00000080,
1006        0xc404, 0xffffffff, 0x0020003f,
1007        0x30, 0xffffffff, 0x0000001c,
1008        0x34, 0x000f0000, 0x000f0000,
1009        0x160c, 0xffffffff, 0x00000100,
1010        0x1024, 0xffffffff, 0x00000100,
1011        0x102c, 0x00000101, 0x00000000,
1012        0x20a8, 0xffffffff, 0x00000104,
1013        0x264c, 0x000c0000, 0x000c0000,
1014        0x2648, 0x000c0000, 0x000c0000,
1015        0x55e4, 0xff000fff, 0x00000100,
1016        0x55e8, 0x00000001, 0x00000001,
1017        0x2f50, 0x00000001, 0x00000001,
1018        0x30cc, 0xc0000fff, 0x00000104,
1019        0xc1e4, 0x00000001, 0x00000001,
1020        0xd0c0, 0xfffffff0, 0x00000100,
1021        0xd8c0, 0xfffffff0, 0x00000100
1022};
1023
1024static const u32 hainan_mgcg_cgcg_init[] =
1025{
1026        0xc400, 0xffffffff, 0xfffffffc,
1027        0x802c, 0xffffffff, 0xe0000000,
1028        0x9a60, 0xffffffff, 0x00000100,
1029        0x92a4, 0xffffffff, 0x00000100,
1030        0xc164, 0xffffffff, 0x00000100,
1031        0x9774, 0xffffffff, 0x00000100,
1032        0x8984, 0xffffffff, 0x06000100,
1033        0x8a18, 0xffffffff, 0x00000100,
1034        0x92a0, 0xffffffff, 0x00000100,
1035        0xc380, 0xffffffff, 0x00000100,
1036        0x8b28, 0xffffffff, 0x00000100,
1037        0x9144, 0xffffffff, 0x00000100,
1038        0x8d88, 0xffffffff, 0x00000100,
1039        0x8d8c, 0xffffffff, 0x00000100,
1040        0x9030, 0xffffffff, 0x00000100,
1041        0x9034, 0xffffffff, 0x00000100,
1042        0x9038, 0xffffffff, 0x00000100,
1043        0x903c, 0xffffffff, 0x00000100,
1044        0xad80, 0xffffffff, 0x00000100,
1045        0xac54, 0xffffffff, 0x00000100,
1046        0x897c, 0xffffffff, 0x06000100,
1047        0x9868, 0xffffffff, 0x00000100,
1048        0x9510, 0xffffffff, 0x00000100,
1049        0xaf04, 0xffffffff, 0x00000100,
1050        0xae04, 0xffffffff, 0x00000100,
1051        0x949c, 0xffffffff, 0x00000100,
1052        0x802c, 0xffffffff, 0xe0000000,
1053        0x9160, 0xffffffff, 0x00010000,
1054        0x9164, 0xffffffff, 0x00030002,
1055        0x9168, 0xffffffff, 0x00040007,
1056        0x916c, 0xffffffff, 0x00060005,
1057        0x9170, 0xffffffff, 0x00090008,
1058        0x9174, 0xffffffff, 0x00020001,
1059        0x9178, 0xffffffff, 0x00040003,
1060        0x917c, 0xffffffff, 0x00000007,
1061        0x9180, 0xffffffff, 0x00060005,
1062        0x9184, 0xffffffff, 0x00090008,
1063        0x9188, 0xffffffff, 0x00030002,
1064        0x918c, 0xffffffff, 0x00050004,
1065        0x9190, 0xffffffff, 0x00000008,
1066        0x9194, 0xffffffff, 0x00070006,
1067        0x9198, 0xffffffff, 0x000a0009,
1068        0x919c, 0xffffffff, 0x00040003,
1069        0x91a0, 0xffffffff, 0x00060005,
1070        0x91a4, 0xffffffff, 0x00000009,
1071        0x91a8, 0xffffffff, 0x00080007,
1072        0x91ac, 0xffffffff, 0x000b000a,
1073        0x91b0, 0xffffffff, 0x00050004,
1074        0x91b4, 0xffffffff, 0x00070006,
1075        0x91b8, 0xffffffff, 0x0008000b,
1076        0x91bc, 0xffffffff, 0x000a0009,
1077        0x91c0, 0xffffffff, 0x000d000c,
1078        0x91c4, 0xffffffff, 0x00060005,
1079        0x91c8, 0xffffffff, 0x00080007,
1080        0x91cc, 0xffffffff, 0x0000000b,
1081        0x91d0, 0xffffffff, 0x000a0009,
1082        0x91d4, 0xffffffff, 0x000d000c,
1083        0x9150, 0xffffffff, 0x96940200,
1084        0x8708, 0xffffffff, 0x00900100,
1085        0xc478, 0xffffffff, 0x00000080,
1086        0xc404, 0xffffffff, 0x0020003f,
1087        0x30, 0xffffffff, 0x0000001c,
1088        0x34, 0x000f0000, 0x000f0000,
1089        0x160c, 0xffffffff, 0x00000100,
1090        0x1024, 0xffffffff, 0x00000100,
1091        0x20a8, 0xffffffff, 0x00000104,
1092        0x264c, 0x000c0000, 0x000c0000,
1093        0x2648, 0x000c0000, 0x000c0000,
1094        0x2f50, 0x00000001, 0x00000001,
1095        0x30cc, 0xc0000fff, 0x00000104,
1096        0xc1e4, 0x00000001, 0x00000001,
1097        0xd0c0, 0xfffffff0, 0x00000100,
1098        0xd8c0, 0xfffffff0, 0x00000100
1099};
1100
1101static u32 verde_pg_init[] =
1102{
1103        0x353c, 0xffffffff, 0x40000,
1104        0x3538, 0xffffffff, 0x200010ff,
1105        0x353c, 0xffffffff, 0x0,
1106        0x353c, 0xffffffff, 0x0,
1107        0x353c, 0xffffffff, 0x0,
1108        0x353c, 0xffffffff, 0x0,
1109        0x353c, 0xffffffff, 0x0,
1110        0x353c, 0xffffffff, 0x7007,
1111        0x3538, 0xffffffff, 0x300010ff,
1112        0x353c, 0xffffffff, 0x0,
1113        0x353c, 0xffffffff, 0x0,
1114        0x353c, 0xffffffff, 0x0,
1115        0x353c, 0xffffffff, 0x0,
1116        0x353c, 0xffffffff, 0x0,
1117        0x353c, 0xffffffff, 0x400000,
1118        0x3538, 0xffffffff, 0x100010ff,
1119        0x353c, 0xffffffff, 0x0,
1120        0x353c, 0xffffffff, 0x0,
1121        0x353c, 0xffffffff, 0x0,
1122        0x353c, 0xffffffff, 0x0,
1123        0x353c, 0xffffffff, 0x0,
1124        0x353c, 0xffffffff, 0x120200,
1125        0x3538, 0xffffffff, 0x500010ff,
1126        0x353c, 0xffffffff, 0x0,
1127        0x353c, 0xffffffff, 0x0,
1128        0x353c, 0xffffffff, 0x0,
1129        0x353c, 0xffffffff, 0x0,
1130        0x353c, 0xffffffff, 0x0,
1131        0x353c, 0xffffffff, 0x1e1e16,
1132        0x3538, 0xffffffff, 0x600010ff,
1133        0x353c, 0xffffffff, 0x0,
1134        0x353c, 0xffffffff, 0x0,
1135        0x353c, 0xffffffff, 0x0,
1136        0x353c, 0xffffffff, 0x0,
1137        0x353c, 0xffffffff, 0x0,
1138        0x353c, 0xffffffff, 0x171f1e,
1139        0x3538, 0xffffffff, 0x700010ff,
1140        0x353c, 0xffffffff, 0x0,
1141        0x353c, 0xffffffff, 0x0,
1142        0x353c, 0xffffffff, 0x0,
1143        0x353c, 0xffffffff, 0x0,
1144        0x353c, 0xffffffff, 0x0,
1145        0x353c, 0xffffffff, 0x0,
1146        0x3538, 0xffffffff, 0x9ff,
1147        0x3500, 0xffffffff, 0x0,
1148        0x3504, 0xffffffff, 0x10000800,
1149        0x3504, 0xffffffff, 0xf,
1150        0x3504, 0xffffffff, 0xf,
1151        0x3500, 0xffffffff, 0x4,
1152        0x3504, 0xffffffff, 0x1000051e,
1153        0x3504, 0xffffffff, 0xffff,
1154        0x3504, 0xffffffff, 0xffff,
1155        0x3500, 0xffffffff, 0x8,
1156        0x3504, 0xffffffff, 0x80500,
1157        0x3500, 0xffffffff, 0x12,
1158        0x3504, 0xffffffff, 0x9050c,
1159        0x3500, 0xffffffff, 0x1d,
1160        0x3504, 0xffffffff, 0xb052c,
1161        0x3500, 0xffffffff, 0x2a,
1162        0x3504, 0xffffffff, 0x1053e,
1163        0x3500, 0xffffffff, 0x2d,
1164        0x3504, 0xffffffff, 0x10546,
1165        0x3500, 0xffffffff, 0x30,
1166        0x3504, 0xffffffff, 0xa054e,
1167        0x3500, 0xffffffff, 0x3c,
1168        0x3504, 0xffffffff, 0x1055f,
1169        0x3500, 0xffffffff, 0x3f,
1170        0x3504, 0xffffffff, 0x10567,
1171        0x3500, 0xffffffff, 0x42,
1172        0x3504, 0xffffffff, 0x1056f,
1173        0x3500, 0xffffffff, 0x45,
1174        0x3504, 0xffffffff, 0x10572,
1175        0x3500, 0xffffffff, 0x48,
1176        0x3504, 0xffffffff, 0x20575,
1177        0x3500, 0xffffffff, 0x4c,
1178        0x3504, 0xffffffff, 0x190801,
1179        0x3500, 0xffffffff, 0x67,
1180        0x3504, 0xffffffff, 0x1082a,
1181        0x3500, 0xffffffff, 0x6a,
1182        0x3504, 0xffffffff, 0x1b082d,
1183        0x3500, 0xffffffff, 0x87,
1184        0x3504, 0xffffffff, 0x310851,
1185        0x3500, 0xffffffff, 0xba,
1186        0x3504, 0xffffffff, 0x891,
1187        0x3500, 0xffffffff, 0xbc,
1188        0x3504, 0xffffffff, 0x893,
1189        0x3500, 0xffffffff, 0xbe,
1190        0x3504, 0xffffffff, 0x20895,
1191        0x3500, 0xffffffff, 0xc2,
1192        0x3504, 0xffffffff, 0x20899,
1193        0x3500, 0xffffffff, 0xc6,
1194        0x3504, 0xffffffff, 0x2089d,
1195        0x3500, 0xffffffff, 0xca,
1196        0x3504, 0xffffffff, 0x8a1,
1197        0x3500, 0xffffffff, 0xcc,
1198        0x3504, 0xffffffff, 0x8a3,
1199        0x3500, 0xffffffff, 0xce,
1200        0x3504, 0xffffffff, 0x308a5,
1201        0x3500, 0xffffffff, 0xd3,
1202        0x3504, 0xffffffff, 0x6d08cd,
1203        0x3500, 0xffffffff, 0x142,
1204        0x3504, 0xffffffff, 0x2000095a,
1205        0x3504, 0xffffffff, 0x1,
1206        0x3500, 0xffffffff, 0x144,
1207        0x3504, 0xffffffff, 0x301f095b,
1208        0x3500, 0xffffffff, 0x165,
1209        0x3504, 0xffffffff, 0xc094d,
1210        0x3500, 0xffffffff, 0x173,
1211        0x3504, 0xffffffff, 0xf096d,
1212        0x3500, 0xffffffff, 0x184,
1213        0x3504, 0xffffffff, 0x15097f,
1214        0x3500, 0xffffffff, 0x19b,
1215        0x3504, 0xffffffff, 0xc0998,
1216        0x3500, 0xffffffff, 0x1a9,
1217        0x3504, 0xffffffff, 0x409a7,
1218        0x3500, 0xffffffff, 0x1af,
1219        0x3504, 0xffffffff, 0xcdc,
1220        0x3500, 0xffffffff, 0x1b1,
1221        0x3504, 0xffffffff, 0x800,
1222        0x3508, 0xffffffff, 0x6c9b2000,
1223        0x3510, 0xfc00, 0x2000,
1224        0x3544, 0xffffffff, 0xfc0,
1225        0x28d4, 0x00000100, 0x100
1226};
1227
1228static void si_init_golden_registers(struct radeon_device *rdev)
1229{
1230        switch (rdev->family) {
1231        case CHIP_TAHITI:
1232                radeon_program_register_sequence(rdev,
1233                                                 tahiti_golden_registers,
1234                                                 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1235                radeon_program_register_sequence(rdev,
1236                                                 tahiti_golden_rlc_registers,
1237                                                 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1238                radeon_program_register_sequence(rdev,
1239                                                 tahiti_mgcg_cgcg_init,
1240                                                 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1241                radeon_program_register_sequence(rdev,
1242                                                 tahiti_golden_registers2,
1243                                                 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1244                break;
1245        case CHIP_PITCAIRN:
1246                radeon_program_register_sequence(rdev,
1247                                                 pitcairn_golden_registers,
1248                                                 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1249                radeon_program_register_sequence(rdev,
1250                                                 pitcairn_golden_rlc_registers,
1251                                                 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1252                radeon_program_register_sequence(rdev,
1253                                                 pitcairn_mgcg_cgcg_init,
1254                                                 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1255                break;
1256        case CHIP_VERDE:
1257                radeon_program_register_sequence(rdev,
1258                                                 verde_golden_registers,
1259                                                 (const u32)ARRAY_SIZE(verde_golden_registers));
1260                radeon_program_register_sequence(rdev,
1261                                                 verde_golden_rlc_registers,
1262                                                 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1263                radeon_program_register_sequence(rdev,
1264                                                 verde_mgcg_cgcg_init,
1265                                                 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1266                radeon_program_register_sequence(rdev,
1267                                                 verde_pg_init,
1268                                                 (const u32)ARRAY_SIZE(verde_pg_init));
1269                break;
1270        case CHIP_OLAND:
1271                radeon_program_register_sequence(rdev,
1272                                                 oland_golden_registers,
1273                                                 (const u32)ARRAY_SIZE(oland_golden_registers));
1274                radeon_program_register_sequence(rdev,
1275                                                 oland_golden_rlc_registers,
1276                                                 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1277                radeon_program_register_sequence(rdev,
1278                                                 oland_mgcg_cgcg_init,
1279                                                 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1280                break;
1281        case CHIP_HAINAN:
1282                radeon_program_register_sequence(rdev,
1283                                                 hainan_golden_registers,
1284                                                 (const u32)ARRAY_SIZE(hainan_golden_registers));
1285                radeon_program_register_sequence(rdev,
1286                                                 hainan_golden_registers2,
1287                                                 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1288                radeon_program_register_sequence(rdev,
1289                                                 hainan_mgcg_cgcg_init,
1290                                                 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1291                break;
1292        default:
1293                break;
1294        }
1295}
1296
1297/**
1298 * si_get_allowed_info_register - fetch the register for the info ioctl
1299 *
1300 * @rdev: radeon_device pointer
1301 * @reg: register offset in bytes
1302 * @val: register value
1303 *
1304 * Returns 0 for success or -EINVAL for an invalid register
1305 *
1306 */
1307int si_get_allowed_info_register(struct radeon_device *rdev,
1308                                 u32 reg, u32 *val)
1309{
1310        switch (reg) {
1311        case GRBM_STATUS:
1312        case GRBM_STATUS2:
1313        case GRBM_STATUS_SE0:
1314        case GRBM_STATUS_SE1:
1315        case SRBM_STATUS:
1316        case SRBM_STATUS2:
1317        case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1318        case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1319        case UVD_STATUS:
1320                *val = RREG32(reg);
1321                return 0;
1322        default:
1323                return -EINVAL;
1324        }
1325}
1326
1327#define PCIE_BUS_CLK                10000
1328#define TCLK                        (PCIE_BUS_CLK / 10)
1329
1330/**
1331 * si_get_xclk - get the xclk
1332 *
1333 * @rdev: radeon_device pointer
1334 *
1335 * Returns the reference clock used by the gfx engine
1336 * (SI).
1337 */
1338u32 si_get_xclk(struct radeon_device *rdev)
1339{
1340        u32 reference_clock = rdev->clock.spll.reference_freq;
1341        u32 tmp;
1342
1343        tmp = RREG32(CG_CLKPIN_CNTL_2);
1344        if (tmp & MUX_TCLK_TO_XCLK)
1345                return TCLK;
1346
1347        tmp = RREG32(CG_CLKPIN_CNTL);
1348        if (tmp & XTALIN_DIVIDE)
1349                return reference_clock / 4;
1350
1351        return reference_clock;
1352}
1353
1354/* get temperature in millidegrees */
1355int si_get_temp(struct radeon_device *rdev)
1356{
1357        u32 temp;
1358        int actual_temp = 0;
1359
1360        temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1361                CTF_TEMP_SHIFT;
1362
1363        if (temp & 0x200)
1364                actual_temp = 255;
1365        else
1366                actual_temp = temp & 0x1ff;
1367
1368        actual_temp = (actual_temp * 1000);
1369
1370        return actual_temp;
1371}
1372
1373#define TAHITI_IO_MC_REGS_SIZE 36
1374
1375static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376        {0x0000006f, 0x03044000},
1377        {0x00000070, 0x0480c018},
1378        {0x00000071, 0x00000040},
1379        {0x00000072, 0x01000000},
1380        {0x00000074, 0x000000ff},
1381        {0x00000075, 0x00143400},
1382        {0x00000076, 0x08ec0800},
1383        {0x00000077, 0x040000cc},
1384        {0x00000079, 0x00000000},
1385        {0x0000007a, 0x21000409},
1386        {0x0000007c, 0x00000000},
1387        {0x0000007d, 0xe8000000},
1388        {0x0000007e, 0x044408a8},
1389        {0x0000007f, 0x00000003},
1390        {0x00000080, 0x00000000},
1391        {0x00000081, 0x01000000},
1392        {0x00000082, 0x02000000},
1393        {0x00000083, 0x00000000},
1394        {0x00000084, 0xe3f3e4f4},
1395        {0x00000085, 0x00052024},
1396        {0x00000087, 0x00000000},
1397        {0x00000088, 0x66036603},
1398        {0x00000089, 0x01000000},
1399        {0x0000008b, 0x1c0a0000},
1400        {0x0000008c, 0xff010000},
1401        {0x0000008e, 0xffffefff},
1402        {0x0000008f, 0xfff3efff},
1403        {0x00000090, 0xfff3efbf},
1404        {0x00000094, 0x00101101},
1405        {0x00000095, 0x00000fff},
1406        {0x00000096, 0x00116fff},
1407        {0x00000097, 0x60010000},
1408        {0x00000098, 0x10010000},
1409        {0x00000099, 0x00006000},
1410        {0x0000009a, 0x00001000},
1411        {0x0000009f, 0x00a77400}
1412};
1413
1414static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415        {0x0000006f, 0x03044000},
1416        {0x00000070, 0x0480c018},
1417        {0x00000071, 0x00000040},
1418        {0x00000072, 0x01000000},
1419        {0x00000074, 0x000000ff},
1420        {0x00000075, 0x00143400},
1421        {0x00000076, 0x08ec0800},
1422        {0x00000077, 0x040000cc},
1423        {0x00000079, 0x00000000},
1424        {0x0000007a, 0x21000409},
1425        {0x0000007c, 0x00000000},
1426        {0x0000007d, 0xe8000000},
1427        {0x0000007e, 0x044408a8},
1428        {0x0000007f, 0x00000003},
1429        {0x00000080, 0x00000000},
1430        {0x00000081, 0x01000000},
1431        {0x00000082, 0x02000000},
1432        {0x00000083, 0x00000000},
1433        {0x00000084, 0xe3f3e4f4},
1434        {0x00000085, 0x00052024},
1435        {0x00000087, 0x00000000},
1436        {0x00000088, 0x66036603},
1437        {0x00000089, 0x01000000},
1438        {0x0000008b, 0x1c0a0000},
1439        {0x0000008c, 0xff010000},
1440        {0x0000008e, 0xffffefff},
1441        {0x0000008f, 0xfff3efff},
1442        {0x00000090, 0xfff3efbf},
1443        {0x00000094, 0x00101101},
1444        {0x00000095, 0x00000fff},
1445        {0x00000096, 0x00116fff},
1446        {0x00000097, 0x60010000},
1447        {0x00000098, 0x10010000},
1448        {0x00000099, 0x00006000},
1449        {0x0000009a, 0x00001000},
1450        {0x0000009f, 0x00a47400}
1451};
1452
1453static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454        {0x0000006f, 0x03044000},
1455        {0x00000070, 0x0480c018},
1456        {0x00000071, 0x00000040},
1457        {0x00000072, 0x01000000},
1458        {0x00000074, 0x000000ff},
1459        {0x00000075, 0x00143400},
1460        {0x00000076, 0x08ec0800},
1461        {0x00000077, 0x040000cc},
1462        {0x00000079, 0x00000000},
1463        {0x0000007a, 0x21000409},
1464        {0x0000007c, 0x00000000},
1465        {0x0000007d, 0xe8000000},
1466        {0x0000007e, 0x044408a8},
1467        {0x0000007f, 0x00000003},
1468        {0x00000080, 0x00000000},
1469        {0x00000081, 0x01000000},
1470        {0x00000082, 0x02000000},
1471        {0x00000083, 0x00000000},
1472        {0x00000084, 0xe3f3e4f4},
1473        {0x00000085, 0x00052024},
1474        {0x00000087, 0x00000000},
1475        {0x00000088, 0x66036603},
1476        {0x00000089, 0x01000000},
1477        {0x0000008b, 0x1c0a0000},
1478        {0x0000008c, 0xff010000},
1479        {0x0000008e, 0xffffefff},
1480        {0x0000008f, 0xfff3efff},
1481        {0x00000090, 0xfff3efbf},
1482        {0x00000094, 0x00101101},
1483        {0x00000095, 0x00000fff},
1484        {0x00000096, 0x00116fff},
1485        {0x00000097, 0x60010000},
1486        {0x00000098, 0x10010000},
1487        {0x00000099, 0x00006000},
1488        {0x0000009a, 0x00001000},
1489        {0x0000009f, 0x00a37400}
1490};
1491
1492static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493        {0x0000006f, 0x03044000},
1494        {0x00000070, 0x0480c018},
1495        {0x00000071, 0x00000040},
1496        {0x00000072, 0x01000000},
1497        {0x00000074, 0x000000ff},
1498        {0x00000075, 0x00143400},
1499        {0x00000076, 0x08ec0800},
1500        {0x00000077, 0x040000cc},
1501        {0x00000079, 0x00000000},
1502        {0x0000007a, 0x21000409},
1503        {0x0000007c, 0x00000000},
1504        {0x0000007d, 0xe8000000},
1505        {0x0000007e, 0x044408a8},
1506        {0x0000007f, 0x00000003},
1507        {0x00000080, 0x00000000},
1508        {0x00000081, 0x01000000},
1509        {0x00000082, 0x02000000},
1510        {0x00000083, 0x00000000},
1511        {0x00000084, 0xe3f3e4f4},
1512        {0x00000085, 0x00052024},
1513        {0x00000087, 0x00000000},
1514        {0x00000088, 0x66036603},
1515        {0x00000089, 0x01000000},
1516        {0x0000008b, 0x1c0a0000},
1517        {0x0000008c, 0xff010000},
1518        {0x0000008e, 0xffffefff},
1519        {0x0000008f, 0xfff3efff},
1520        {0x00000090, 0xfff3efbf},
1521        {0x00000094, 0x00101101},
1522        {0x00000095, 0x00000fff},
1523        {0x00000096, 0x00116fff},
1524        {0x00000097, 0x60010000},
1525        {0x00000098, 0x10010000},
1526        {0x00000099, 0x00006000},
1527        {0x0000009a, 0x00001000},
1528        {0x0000009f, 0x00a17730}
1529};
1530
1531static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1532        {0x0000006f, 0x03044000},
1533        {0x00000070, 0x0480c018},
1534        {0x00000071, 0x00000040},
1535        {0x00000072, 0x01000000},
1536        {0x00000074, 0x000000ff},
1537        {0x00000075, 0x00143400},
1538        {0x00000076, 0x08ec0800},
1539        {0x00000077, 0x040000cc},
1540        {0x00000079, 0x00000000},
1541        {0x0000007a, 0x21000409},
1542        {0x0000007c, 0x00000000},
1543        {0x0000007d, 0xe8000000},
1544        {0x0000007e, 0x044408a8},
1545        {0x0000007f, 0x00000003},
1546        {0x00000080, 0x00000000},
1547        {0x00000081, 0x01000000},
1548        {0x00000082, 0x02000000},
1549        {0x00000083, 0x00000000},
1550        {0x00000084, 0xe3f3e4f4},
1551        {0x00000085, 0x00052024},
1552        {0x00000087, 0x00000000},
1553        {0x00000088, 0x66036603},
1554        {0x00000089, 0x01000000},
1555        {0x0000008b, 0x1c0a0000},
1556        {0x0000008c, 0xff010000},
1557        {0x0000008e, 0xffffefff},
1558        {0x0000008f, 0xfff3efff},
1559        {0x00000090, 0xfff3efbf},
1560        {0x00000094, 0x00101101},
1561        {0x00000095, 0x00000fff},
1562        {0x00000096, 0x00116fff},
1563        {0x00000097, 0x60010000},
1564        {0x00000098, 0x10010000},
1565        {0x00000099, 0x00006000},
1566        {0x0000009a, 0x00001000},
1567        {0x0000009f, 0x00a07730}
1568};
1569
1570/* ucode loading */
1571int si_mc_load_microcode(struct radeon_device *rdev)
1572{
1573        const __be32 *fw_data = NULL;
1574        const __le32 *new_fw_data = NULL;
1575        u32 running;
1576        u32 *io_mc_regs = NULL;
1577        const __le32 *new_io_mc_regs = NULL;
1578        int i, regs_size, ucode_size;
1579
1580        if (!rdev->mc_fw)
1581                return -EINVAL;
1582
1583        if (rdev->new_fw) {
1584                const struct mc_firmware_header_v1_0 *hdr =
1585                        (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1586
1587                radeon_ucode_print_mc_hdr(&hdr->header);
1588                regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1589                new_io_mc_regs = (const __le32 *)
1590                        (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1591                ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1592                new_fw_data = (const __le32 *)
1593                        (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1594        } else {
1595                ucode_size = rdev->mc_fw->size / 4;
1596
1597                switch (rdev->family) {
1598                case CHIP_TAHITI:
1599                        io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1600                        regs_size = TAHITI_IO_MC_REGS_SIZE;
1601                        break;
1602                case CHIP_PITCAIRN:
1603                        io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1604                        regs_size = TAHITI_IO_MC_REGS_SIZE;
1605                        break;
1606                case CHIP_VERDE:
1607                default:
1608                        io_mc_regs = (u32 *)&verde_io_mc_regs;
1609                        regs_size = TAHITI_IO_MC_REGS_SIZE;
1610                        break;
1611                case CHIP_OLAND:
1612                        io_mc_regs = (u32 *)&oland_io_mc_regs;
1613                        regs_size = TAHITI_IO_MC_REGS_SIZE;
1614                        break;
1615                case CHIP_HAINAN:
1616                        io_mc_regs = (u32 *)&hainan_io_mc_regs;
1617                        regs_size = TAHITI_IO_MC_REGS_SIZE;
1618                        break;
1619                }
1620                fw_data = (const __be32 *)rdev->mc_fw->data;
1621        }
1622
1623        running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1624
1625        if (running == 0) {
1626                /* reset the engine and set to writable */
1627                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1628                WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1629
1630                /* load mc io regs */
1631                for (i = 0; i < regs_size; i++) {
1632                        if (rdev->new_fw) {
1633                                WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1634                                WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1635                        } else {
1636                                WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1637                                WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1638                        }
1639                }
1640                /* load the MC ucode */
1641                for (i = 0; i < ucode_size; i++) {
1642                        if (rdev->new_fw)
1643                                WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1644                        else
1645                                WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1646                }
1647
1648                /* put the engine back into the active state */
1649                WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1650                WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1651                WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1652
1653                /* wait for training to complete */
1654                for (i = 0; i < rdev->usec_timeout; i++) {
1655                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1656                                break;
1657                        udelay(1);
1658                }
1659                for (i = 0; i < rdev->usec_timeout; i++) {
1660                        if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1661                                break;
1662                        udelay(1);
1663                }
1664        }
1665
1666        return 0;
1667}
1668
1669static int si_init_microcode(struct radeon_device *rdev)
1670{
1671        const char *chip_name;
1672        const char *new_chip_name;
1673        size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1674        size_t smc_req_size, mc2_req_size;
1675        char fw_name[30];
1676        int err;
1677        int new_fw = 0;
1678        bool new_smc = false;
1679        bool si58_fw = false;
1680        bool banks2_fw = false;
1681
1682        DRM_DEBUG("\n");
1683
1684        switch (rdev->family) {
1685        case CHIP_TAHITI:
1686                chip_name = "TAHITI";
1687                new_chip_name = "tahiti";
1688                pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1689                me_req_size = SI_PM4_UCODE_SIZE * 4;
1690                ce_req_size = SI_CE_UCODE_SIZE * 4;
1691                rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1692                mc_req_size = SI_MC_UCODE_SIZE * 4;
1693                mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1694                smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1695                break;
1696        case CHIP_PITCAIRN:
1697                chip_name = "PITCAIRN";
1698                if ((rdev->pdev->revision == 0x81) &&
1699                    ((rdev->pdev->device == 0x6810) ||
1700                     (rdev->pdev->device == 0x6811)))
1701                        new_smc = true;
1702                new_chip_name = "pitcairn";
1703                pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704                me_req_size = SI_PM4_UCODE_SIZE * 4;
1705                ce_req_size = SI_CE_UCODE_SIZE * 4;
1706                rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707                mc_req_size = SI_MC_UCODE_SIZE * 4;
1708                mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1709                smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1710                break;
1711        case CHIP_VERDE:
1712                chip_name = "VERDE";
1713                if (((rdev->pdev->device == 0x6820) &&
1714                     ((rdev->pdev->revision == 0x81) ||
1715                      (rdev->pdev->revision == 0x83))) ||
1716                    ((rdev->pdev->device == 0x6821) &&
1717                     ((rdev->pdev->revision == 0x83) ||
1718                      (rdev->pdev->revision == 0x87))) ||
1719                    ((rdev->pdev->revision == 0x87) &&
1720                     ((rdev->pdev->device == 0x6823) ||
1721                      (rdev->pdev->device == 0x682b))))
1722                        new_smc = true;
1723                new_chip_name = "verde";
1724                pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1725                me_req_size = SI_PM4_UCODE_SIZE * 4;
1726                ce_req_size = SI_CE_UCODE_SIZE * 4;
1727                rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1728                mc_req_size = SI_MC_UCODE_SIZE * 4;
1729                mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1730                smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1731                break;
1732        case CHIP_OLAND:
1733                chip_name = "OLAND";
1734                if (((rdev->pdev->revision == 0x81) &&
1735                     ((rdev->pdev->device == 0x6600) ||
1736                      (rdev->pdev->device == 0x6604) ||
1737                      (rdev->pdev->device == 0x6605) ||
1738                      (rdev->pdev->device == 0x6610))) ||
1739                    ((rdev->pdev->revision == 0x83) &&
1740                     (rdev->pdev->device == 0x6610)))
1741                        new_smc = true;
1742                new_chip_name = "oland";
1743                pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1744                me_req_size = SI_PM4_UCODE_SIZE * 4;
1745                ce_req_size = SI_CE_UCODE_SIZE * 4;
1746                rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1747                mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1748                smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1749                break;
1750        case CHIP_HAINAN:
1751                chip_name = "HAINAN";
1752                if (((rdev->pdev->revision == 0x81) &&
1753                     (rdev->pdev->device == 0x6660)) ||
1754                    ((rdev->pdev->revision == 0x83) &&
1755                     ((rdev->pdev->device == 0x6660) ||
1756                      (rdev->pdev->device == 0x6663) ||
1757                      (rdev->pdev->device == 0x6665) ||
1758                      (rdev->pdev->device == 0x6667))))
1759                        new_smc = true;
1760                else if ((rdev->pdev->revision == 0xc3) &&
1761                         (rdev->pdev->device == 0x6665))
1762                        banks2_fw = true;
1763                new_chip_name = "hainan";
1764                pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1765                me_req_size = SI_PM4_UCODE_SIZE * 4;
1766                ce_req_size = SI_CE_UCODE_SIZE * 4;
1767                rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1768                mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1769                smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1770                break;
1771        default: BUG();
1772        }
1773
1774        /* this memory configuration requires special firmware */
1775        if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1776                si58_fw = true;
1777
1778        DRM_INFO("Loading %s Microcode\n", new_chip_name);
1779
1780        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1781        err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1782        if (err) {
1783                snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1784                err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1785                if (err)
1786                        goto out;
1787                if (rdev->pfp_fw->size != pfp_req_size) {
1788                        pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1789                               rdev->pfp_fw->size, fw_name);
1790                        err = -EINVAL;
1791                        goto out;
1792                }
1793        } else {
1794                err = radeon_ucode_validate(rdev->pfp_fw);
1795                if (err) {
1796                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797                               fw_name);
1798                        goto out;
1799                } else {
1800                        new_fw++;
1801                }
1802        }
1803
1804        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1805        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1806        if (err) {
1807                snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1808                err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1809                if (err)
1810                        goto out;
1811                if (rdev->me_fw->size != me_req_size) {
1812                        pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813                               rdev->me_fw->size, fw_name);
1814                        err = -EINVAL;
1815                }
1816        } else {
1817                err = radeon_ucode_validate(rdev->me_fw);
1818                if (err) {
1819                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820                               fw_name);
1821                        goto out;
1822                } else {
1823                        new_fw++;
1824                }
1825        }
1826
1827        snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1828        err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1829        if (err) {
1830                snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1831                err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1832                if (err)
1833                        goto out;
1834                if (rdev->ce_fw->size != ce_req_size) {
1835                        pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1836                               rdev->ce_fw->size, fw_name);
1837                        err = -EINVAL;
1838                }
1839        } else {
1840                err = radeon_ucode_validate(rdev->ce_fw);
1841                if (err) {
1842                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843                               fw_name);
1844                        goto out;
1845                } else {
1846                        new_fw++;
1847                }
1848        }
1849
1850        snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1851        err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1852        if (err) {
1853                snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1854                err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1855                if (err)
1856                        goto out;
1857                if (rdev->rlc_fw->size != rlc_req_size) {
1858                        pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1859                               rdev->rlc_fw->size, fw_name);
1860                        err = -EINVAL;
1861                }
1862        } else {
1863                err = radeon_ucode_validate(rdev->rlc_fw);
1864                if (err) {
1865                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1866                               fw_name);
1867                        goto out;
1868                } else {
1869                        new_fw++;
1870                }
1871        }
1872
1873        if (si58_fw)
1874                snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1875        else
1876                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1877        err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1878        if (err) {
1879                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1880                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1881                if (err) {
1882                        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1883                        err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1884                        if (err)
1885                                goto out;
1886                }
1887                if ((rdev->mc_fw->size != mc_req_size) &&
1888                    (rdev->mc_fw->size != mc2_req_size)) {
1889                        pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1890                               rdev->mc_fw->size, fw_name);
1891                        err = -EINVAL;
1892                }
1893                DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1894        } else {
1895                err = radeon_ucode_validate(rdev->mc_fw);
1896                if (err) {
1897                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1898                               fw_name);
1899                        goto out;
1900                } else {
1901                        new_fw++;
1902                }
1903        }
1904
1905        if (banks2_fw)
1906                snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1907        else if (new_smc)
1908                snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1909        else
1910                snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1911        err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1912        if (err) {
1913                snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1914                err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1915                if (err) {
1916                        pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1917                        release_firmware(rdev->smc_fw);
1918                        rdev->smc_fw = NULL;
1919                        err = 0;
1920                } else if (rdev->smc_fw->size != smc_req_size) {
1921                        pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1922                               rdev->smc_fw->size, fw_name);
1923                        err = -EINVAL;
1924                }
1925        } else {
1926                err = radeon_ucode_validate(rdev->smc_fw);
1927                if (err) {
1928                        pr_err("si_cp: validation failed for firmware \"%s\"\n",
1929                               fw_name);
1930                        goto out;
1931                } else {
1932                        new_fw++;
1933                }
1934        }
1935
1936        if (new_fw == 0) {
1937                rdev->new_fw = false;
1938        } else if (new_fw < 6) {
1939                pr_err("si_fw: mixing new and old firmware!\n");
1940                err = -EINVAL;
1941        } else {
1942                rdev->new_fw = true;
1943        }
1944out:
1945        if (err) {
1946                if (err != -EINVAL)
1947                        pr_err("si_cp: Failed to load firmware \"%s\"\n",
1948                               fw_name);
1949                release_firmware(rdev->pfp_fw);
1950                rdev->pfp_fw = NULL;
1951                release_firmware(rdev->me_fw);
1952                rdev->me_fw = NULL;
1953                release_firmware(rdev->ce_fw);
1954                rdev->ce_fw = NULL;
1955                release_firmware(rdev->rlc_fw);
1956                rdev->rlc_fw = NULL;
1957                release_firmware(rdev->mc_fw);
1958                rdev->mc_fw = NULL;
1959                release_firmware(rdev->smc_fw);
1960                rdev->smc_fw = NULL;
1961        }
1962        return err;
1963}
1964
1965/* watermark setup */
1966static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1967                                   struct radeon_crtc *radeon_crtc,
1968                                   struct drm_display_mode *mode,
1969                                   struct drm_display_mode *other_mode)
1970{
1971        u32 tmp, buffer_alloc, i;
1972        u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1973        /*
1974         * Line Buffer Setup
1975         * There are 3 line buffers, each one shared by 2 display controllers.
1976         * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1977         * the display controllers.  The paritioning is done via one of four
1978         * preset allocations specified in bits 21:20:
1979         *  0 - half lb
1980         *  2 - whole lb, other crtc must be disabled
1981         */
1982        /* this can get tricky if we have two large displays on a paired group
1983         * of crtcs.  Ideally for multiple large displays we'd assign them to
1984         * non-linked crtcs for maximum line buffer allocation.
1985         */
1986        if (radeon_crtc->base.enabled && mode) {
1987                if (other_mode) {
1988                        tmp = 0; /* 1/2 */
1989                        buffer_alloc = 1;
1990                } else {
1991                        tmp = 2; /* whole */
1992                        buffer_alloc = 2;
1993                }
1994        } else {
1995                tmp = 0;
1996                buffer_alloc = 0;
1997        }
1998
1999        WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2000               DC_LB_MEMORY_CONFIG(tmp));
2001
2002        WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2003               DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2004        for (i = 0; i < rdev->usec_timeout; i++) {
2005                if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2006                    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2007                        break;
2008                udelay(1);
2009        }
2010
2011        if (radeon_crtc->base.enabled && mode) {
2012                switch (tmp) {
2013                case 0:
2014                default:
2015                        return 4096 * 2;
2016                case 2:
2017                        return 8192 * 2;
2018                }
2019        }
2020
2021        /* controller not enabled, so no lb used */
2022        return 0;
2023}
2024
2025static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2026{
2027        u32 tmp = RREG32(MC_SHARED_CHMAP);
2028
2029        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2030        case 0:
2031        default:
2032                return 1;
2033        case 1:
2034                return 2;
2035        case 2:
2036                return 4;
2037        case 3:
2038                return 8;
2039        case 4:
2040                return 3;
2041        case 5:
2042                return 6;
2043        case 6:
2044                return 10;
2045        case 7:
2046                return 12;
2047        case 8:
2048                return 16;
2049        }
2050}
2051
2052struct dce6_wm_params {
2053        u32 dram_channels; /* number of dram channels */
2054        u32 yclk;          /* bandwidth per dram data pin in kHz */
2055        u32 sclk;          /* engine clock in kHz */
2056        u32 disp_clk;      /* display clock in kHz */
2057        u32 src_width;     /* viewport width */
2058        u32 active_time;   /* active display time in ns */
2059        u32 blank_time;    /* blank time in ns */
2060        bool interlaced;    /* mode is interlaced */
2061        fixed20_12 vsc;    /* vertical scale ratio */
2062        u32 num_heads;     /* number of active crtcs */
2063        u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2064        u32 lb_size;       /* line buffer allocated to pipe */
2065        u32 vtaps;         /* vertical scaler taps */
2066};
2067
2068static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2069{
2070        /* Calculate raw DRAM Bandwidth */
2071        fixed20_12 dram_efficiency; /* 0.7 */
2072        fixed20_12 yclk, dram_channels, bandwidth;
2073        fixed20_12 a;
2074
2075        a.full = dfixed_const(1000);
2076        yclk.full = dfixed_const(wm->yclk);
2077        yclk.full = dfixed_div(yclk, a);
2078        dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079        a.full = dfixed_const(10);
2080        dram_efficiency.full = dfixed_const(7);
2081        dram_efficiency.full = dfixed_div(dram_efficiency, a);
2082        bandwidth.full = dfixed_mul(dram_channels, yclk);
2083        bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2084
2085        return dfixed_trunc(bandwidth);
2086}
2087
2088static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2089{
2090        /* Calculate DRAM Bandwidth and the part allocated to display. */
2091        fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2092        fixed20_12 yclk, dram_channels, bandwidth;
2093        fixed20_12 a;
2094
2095        a.full = dfixed_const(1000);
2096        yclk.full = dfixed_const(wm->yclk);
2097        yclk.full = dfixed_div(yclk, a);
2098        dram_channels.full = dfixed_const(wm->dram_channels * 4);
2099        a.full = dfixed_const(10);
2100        disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2101        disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2102        bandwidth.full = dfixed_mul(dram_channels, yclk);
2103        bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2104
2105        return dfixed_trunc(bandwidth);
2106}
2107
2108static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2109{
2110        /* Calculate the display Data return Bandwidth */
2111        fixed20_12 return_efficiency; /* 0.8 */
2112        fixed20_12 sclk, bandwidth;
2113        fixed20_12 a;
2114
2115        a.full = dfixed_const(1000);
2116        sclk.full = dfixed_const(wm->sclk);
2117        sclk.full = dfixed_div(sclk, a);
2118        a.full = dfixed_const(10);
2119        return_efficiency.full = dfixed_const(8);
2120        return_efficiency.full = dfixed_div(return_efficiency, a);
2121        a.full = dfixed_const(32);
2122        bandwidth.full = dfixed_mul(a, sclk);
2123        bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2124
2125        return dfixed_trunc(bandwidth);
2126}
2127
2128static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2129{
2130        return 32;
2131}
2132
2133static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2134{
2135        /* Calculate the DMIF Request Bandwidth */
2136        fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2137        fixed20_12 disp_clk, sclk, bandwidth;
2138        fixed20_12 a, b1, b2;
2139        u32 min_bandwidth;
2140
2141        a.full = dfixed_const(1000);
2142        disp_clk.full = dfixed_const(wm->disp_clk);
2143        disp_clk.full = dfixed_div(disp_clk, a);
2144        a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2145        b1.full = dfixed_mul(a, disp_clk);
2146
2147        a.full = dfixed_const(1000);
2148        sclk.full = dfixed_const(wm->sclk);
2149        sclk.full = dfixed_div(sclk, a);
2150        a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2151        b2.full = dfixed_mul(a, sclk);
2152
2153        a.full = dfixed_const(10);
2154        disp_clk_request_efficiency.full = dfixed_const(8);
2155        disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2156
2157        min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2158
2159        a.full = dfixed_const(min_bandwidth);
2160        bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2161
2162        return dfixed_trunc(bandwidth);
2163}
2164
2165static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2166{
2167        /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2168        u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2169        u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2170        u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2171
2172        return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2173}
2174
2175static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2176{
2177        /* Calculate the display mode Average Bandwidth
2178         * DisplayMode should contain the source and destination dimensions,
2179         * timing, etc.
2180         */
2181        fixed20_12 bpp;
2182        fixed20_12 line_time;
2183        fixed20_12 src_width;
2184        fixed20_12 bandwidth;
2185        fixed20_12 a;
2186
2187        a.full = dfixed_const(1000);
2188        line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2189        line_time.full = dfixed_div(line_time, a);
2190        bpp.full = dfixed_const(wm->bytes_per_pixel);
2191        src_width.full = dfixed_const(wm->src_width);
2192        bandwidth.full = dfixed_mul(src_width, bpp);
2193        bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2194        bandwidth.full = dfixed_div(bandwidth, line_time);
2195
2196        return dfixed_trunc(bandwidth);
2197}
2198
2199static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2200{
2201        /* First calcualte the latency in ns */
2202        u32 mc_latency = 2000; /* 2000 ns. */
2203        u32 available_bandwidth = dce6_available_bandwidth(wm);
2204        u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2205        u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2206        u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2207        u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2208                (wm->num_heads * cursor_line_pair_return_time);
2209        u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2210        u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2211        u32 tmp, dmif_size = 12288;
2212        fixed20_12 a, b, c;
2213
2214        if (wm->num_heads == 0)
2215                return 0;
2216
2217        a.full = dfixed_const(2);
2218        b.full = dfixed_const(1);
2219        if ((wm->vsc.full > a.full) ||
2220            ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2221            (wm->vtaps >= 5) ||
2222            ((wm->vsc.full >= a.full) && wm->interlaced))
2223                max_src_lines_per_dst_line = 4;
2224        else
2225                max_src_lines_per_dst_line = 2;
2226
2227        a.full = dfixed_const(available_bandwidth);
2228        b.full = dfixed_const(wm->num_heads);
2229        a.full = dfixed_div(a, b);
2230        tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2231        tmp = min(dfixed_trunc(a), tmp);
2232
2233        lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2234
2235        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2236        b.full = dfixed_const(1000);
2237        c.full = dfixed_const(lb_fill_bw);
2238        b.full = dfixed_div(c, b);
2239        a.full = dfixed_div(a, b);
2240        line_fill_time = dfixed_trunc(a);
2241
2242        if (line_fill_time < wm->active_time)
2243                return latency;
2244        else
2245                return latency + (line_fill_time - wm->active_time);
2246
2247}
2248
2249static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2250{
2251        if (dce6_average_bandwidth(wm) <=
2252            (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2253                return true;
2254        else
2255                return false;
2256};
2257
2258static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2259{
2260        if (dce6_average_bandwidth(wm) <=
2261            (dce6_available_bandwidth(wm) / wm->num_heads))
2262                return true;
2263        else
2264                return false;
2265};
2266
2267static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2268{
2269        u32 lb_partitions = wm->lb_size / wm->src_width;
2270        u32 line_time = wm->active_time + wm->blank_time;
2271        u32 latency_tolerant_lines;
2272        u32 latency_hiding;
2273        fixed20_12 a;
2274
2275        a.full = dfixed_const(1);
2276        if (wm->vsc.full > a.full)
2277                latency_tolerant_lines = 1;
2278        else {
2279                if (lb_partitions <= (wm->vtaps + 1))
2280                        latency_tolerant_lines = 1;
2281                else
2282                        latency_tolerant_lines = 2;
2283        }
2284
2285        latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2286
2287        if (dce6_latency_watermark(wm) <= latency_hiding)
2288                return true;
2289        else
2290                return false;
2291}
2292
2293static void dce6_program_watermarks(struct radeon_device *rdev,
2294                                         struct radeon_crtc *radeon_crtc,
2295                                         u32 lb_size, u32 num_heads)
2296{
2297        struct drm_display_mode *mode = &radeon_crtc->base.mode;
2298        struct dce6_wm_params wm_low, wm_high;
2299        u32 dram_channels;
2300        u32 active_time;
2301        u32 line_time = 0;
2302        u32 latency_watermark_a = 0, latency_watermark_b = 0;
2303        u32 priority_a_mark = 0, priority_b_mark = 0;
2304        u32 priority_a_cnt = PRIORITY_OFF;
2305        u32 priority_b_cnt = PRIORITY_OFF;
2306        u32 tmp, arb_control3;
2307        fixed20_12 a, b, c;
2308
2309        if (radeon_crtc->base.enabled && num_heads && mode) {
2310                active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2311                                            (u32)mode->clock);
2312                line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2313                                          (u32)mode->clock);
2314                line_time = min(line_time, (u32)65535);
2315                priority_a_cnt = 0;
2316                priority_b_cnt = 0;
2317
2318                if (rdev->family == CHIP_ARUBA)
2319                        dram_channels = evergreen_get_number_of_dram_channels(rdev);
2320                else
2321                        dram_channels = si_get_number_of_dram_channels(rdev);
2322
2323                /* watermark for high clocks */
2324                if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2325                        wm_high.yclk =
2326                                radeon_dpm_get_mclk(rdev, false) * 10;
2327                        wm_high.sclk =
2328                                radeon_dpm_get_sclk(rdev, false) * 10;
2329                } else {
2330                        wm_high.yclk = rdev->pm.current_mclk * 10;
2331                        wm_high.sclk = rdev->pm.current_sclk * 10;
2332                }
2333
2334                wm_high.disp_clk = mode->clock;
2335                wm_high.src_width = mode->crtc_hdisplay;
2336                wm_high.active_time = active_time;
2337                wm_high.blank_time = line_time - wm_high.active_time;
2338                wm_high.interlaced = false;
2339                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2340                        wm_high.interlaced = true;
2341                wm_high.vsc = radeon_crtc->vsc;
2342                wm_high.vtaps = 1;
2343                if (radeon_crtc->rmx_type != RMX_OFF)
2344                        wm_high.vtaps = 2;
2345                wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2346                wm_high.lb_size = lb_size;
2347                wm_high.dram_channels = dram_channels;
2348                wm_high.num_heads = num_heads;
2349
2350                /* watermark for low clocks */
2351                if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2352                        wm_low.yclk =
2353                                radeon_dpm_get_mclk(rdev, true) * 10;
2354                        wm_low.sclk =
2355                                radeon_dpm_get_sclk(rdev, true) * 10;
2356                } else {
2357                        wm_low.yclk = rdev->pm.current_mclk * 10;
2358                        wm_low.sclk = rdev->pm.current_sclk * 10;
2359                }
2360
2361                wm_low.disp_clk = mode->clock;
2362                wm_low.src_width = mode->crtc_hdisplay;
2363                wm_low.active_time = active_time;
2364                wm_low.blank_time = line_time - wm_low.active_time;
2365                wm_low.interlaced = false;
2366                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2367                        wm_low.interlaced = true;
2368                wm_low.vsc = radeon_crtc->vsc;
2369                wm_low.vtaps = 1;
2370                if (radeon_crtc->rmx_type != RMX_OFF)
2371                        wm_low.vtaps = 2;
2372                wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2373                wm_low.lb_size = lb_size;
2374                wm_low.dram_channels = dram_channels;
2375                wm_low.num_heads = num_heads;
2376
2377                /* set for high clocks */
2378                latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2379                /* set for low clocks */
2380                latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2381
2382                /* possibly force display priority to high */
2383                /* should really do this at mode validation time... */
2384                if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2385                    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2386                    !dce6_check_latency_hiding(&wm_high) ||
2387                    (rdev->disp_priority == 2)) {
2388                        DRM_DEBUG_KMS("force priority to high\n");
2389                        priority_a_cnt |= PRIORITY_ALWAYS_ON;
2390                        priority_b_cnt |= PRIORITY_ALWAYS_ON;
2391                }
2392                if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2393                    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2394                    !dce6_check_latency_hiding(&wm_low) ||
2395                    (rdev->disp_priority == 2)) {
2396                        DRM_DEBUG_KMS("force priority to high\n");
2397                        priority_a_cnt |= PRIORITY_ALWAYS_ON;
2398                        priority_b_cnt |= PRIORITY_ALWAYS_ON;
2399                }
2400
2401                a.full = dfixed_const(1000);
2402                b.full = dfixed_const(mode->clock);
2403                b.full = dfixed_div(b, a);
2404                c.full = dfixed_const(latency_watermark_a);
2405                c.full = dfixed_mul(c, b);
2406                c.full = dfixed_mul(c, radeon_crtc->hsc);
2407                c.full = dfixed_div(c, a);
2408                a.full = dfixed_const(16);
2409                c.full = dfixed_div(c, a);
2410                priority_a_mark = dfixed_trunc(c);
2411                priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2412
2413                a.full = dfixed_const(1000);
2414                b.full = dfixed_const(mode->clock);
2415                b.full = dfixed_div(b, a);
2416                c.full = dfixed_const(latency_watermark_b);
2417                c.full = dfixed_mul(c, b);
2418                c.full = dfixed_mul(c, radeon_crtc->hsc);
2419                c.full = dfixed_div(c, a);
2420                a.full = dfixed_const(16);
2421                c.full = dfixed_div(c, a);
2422                priority_b_mark = dfixed_trunc(c);
2423                priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2424
2425                /* Save number of lines the linebuffer leads before the scanout */
2426                radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2427        }
2428
2429        /* select wm A */
2430        arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431        tmp = arb_control3;
2432        tmp &= ~LATENCY_WATERMARK_MASK(3);
2433        tmp |= LATENCY_WATERMARK_MASK(1);
2434        WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2435        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2436               (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2437                LATENCY_HIGH_WATERMARK(line_time)));
2438        /* select wm B */
2439        tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2440        tmp &= ~LATENCY_WATERMARK_MASK(3);
2441        tmp |= LATENCY_WATERMARK_MASK(2);
2442        WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2443        WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2444               (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2445                LATENCY_HIGH_WATERMARK(line_time)));
2446        /* restore original selection */
2447        WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2448
2449        /* write the priority marks */
2450        WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2451        WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2452
2453        /* save values for DPM */
2454        radeon_crtc->line_time = line_time;
2455        radeon_crtc->wm_high = latency_watermark_a;
2456        radeon_crtc->wm_low = latency_watermark_b;
2457}
2458
2459void dce6_bandwidth_update(struct radeon_device *rdev)
2460{
2461        struct drm_display_mode *mode0 = NULL;
2462        struct drm_display_mode *mode1 = NULL;
2463        u32 num_heads = 0, lb_size;
2464        int i;
2465
2466        if (!rdev->mode_info.mode_config_initialized)
2467                return;
2468
2469        radeon_update_display_priority(rdev);
2470
2471        for (i = 0; i < rdev->num_crtc; i++) {
2472                if (rdev->mode_info.crtcs[i]->base.enabled)
2473                        num_heads++;
2474        }
2475        for (i = 0; i < rdev->num_crtc; i += 2) {
2476                mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2477                mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2478                lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2479                dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2480                lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2481                dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2482        }
2483}
2484
2485/*
2486 * Core functions
2487 */
2488static void si_tiling_mode_table_init(struct radeon_device *rdev)
2489{
2490        u32 *tile = rdev->config.si.tile_mode_array;
2491        const u32 num_tile_mode_states =
2492                        ARRAY_SIZE(rdev->config.si.tile_mode_array);
2493        u32 reg_offset, split_equal_to_row_size;
2494
2495        switch (rdev->config.si.mem_row_size_in_kb) {
2496        case 1:
2497                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2498                break;
2499        case 2:
2500        default:
2501                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2502                break;
2503        case 4:
2504                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2505                break;
2506        }
2507
2508        for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2509                tile[reg_offset] = 0;
2510
2511        switch(rdev->family) {
2512        case CHIP_TAHITI:
2513        case CHIP_PITCAIRN:
2514                /* non-AA compressed depth or any compressed stencil */
2515                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519                           NUM_BANKS(ADDR_SURF_16_BANK) |
2520                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523                /* 2xAA/4xAA compressed depth only */
2524                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528                           NUM_BANKS(ADDR_SURF_16_BANK) |
2529                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532                /* 8xAA compressed depth only */
2533                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                           NUM_BANKS(ADDR_SURF_16_BANK) |
2538                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541                /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2542                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2546                           NUM_BANKS(ADDR_SURF_16_BANK) |
2547                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550                /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2551                tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555                           NUM_BANKS(ADDR_SURF_16_BANK) |
2556                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559                /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2560                tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563                           TILE_SPLIT(split_equal_to_row_size) |
2564                           NUM_BANKS(ADDR_SURF_16_BANK) |
2565                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568                /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2569                tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572                           TILE_SPLIT(split_equal_to_row_size) |
2573                           NUM_BANKS(ADDR_SURF_16_BANK) |
2574                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577                /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2578                tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581                           TILE_SPLIT(split_equal_to_row_size) |
2582                           NUM_BANKS(ADDR_SURF_16_BANK) |
2583                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586                /* 1D and 1D Array Surfaces */
2587                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591                           NUM_BANKS(ADDR_SURF_16_BANK) |
2592                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595                /* Displayable maps. */
2596                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600                           NUM_BANKS(ADDR_SURF_16_BANK) |
2601                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604                /* Display 8bpp. */
2605                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609                           NUM_BANKS(ADDR_SURF_16_BANK) |
2610                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613                /* Display 16bpp. */
2614                tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                           NUM_BANKS(ADDR_SURF_16_BANK) |
2619                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622                /* Display 32bpp. */
2623                tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627                           NUM_BANKS(ADDR_SURF_16_BANK) |
2628                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631                /* Thin. */
2632                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636                           NUM_BANKS(ADDR_SURF_16_BANK) |
2637                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640                /* Thin 8 bpp. */
2641                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                           NUM_BANKS(ADDR_SURF_16_BANK) |
2646                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649                /* Thin 16 bpp. */
2650                tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654                           NUM_BANKS(ADDR_SURF_16_BANK) |
2655                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658                /* Thin 32 bpp. */
2659                tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2663                           NUM_BANKS(ADDR_SURF_16_BANK) |
2664                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667                /* Thin 64 bpp. */
2668                tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671                           TILE_SPLIT(split_equal_to_row_size) |
2672                           NUM_BANKS(ADDR_SURF_16_BANK) |
2673                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2676                /* 8 bpp PRT. */
2677                tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681                           NUM_BANKS(ADDR_SURF_16_BANK) |
2682                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2683                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2685                /* 16 bpp PRT */
2686                tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690                           NUM_BANKS(ADDR_SURF_16_BANK) |
2691                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2694                /* 32 bpp PRT */
2695                tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                           NUM_BANKS(ADDR_SURF_16_BANK) |
2700                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703                /* 64 bpp PRT */
2704                tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708                           NUM_BANKS(ADDR_SURF_16_BANK) |
2709                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712                /* 128 bpp PRT */
2713                tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2717                           NUM_BANKS(ADDR_SURF_8_BANK) |
2718                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2721
2722                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2723                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2724                break;
2725
2726        case CHIP_VERDE:
2727        case CHIP_OLAND:
2728        case CHIP_HAINAN:
2729                /* non-AA compressed depth or any compressed stencil */
2730                tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2734                           NUM_BANKS(ADDR_SURF_16_BANK) |
2735                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738                /* 2xAA/4xAA compressed depth only */
2739                tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2743                           NUM_BANKS(ADDR_SURF_16_BANK) |
2744                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747                /* 8xAA compressed depth only */
2748                tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2752                           NUM_BANKS(ADDR_SURF_16_BANK) |
2753                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756                /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2757                tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2761                           NUM_BANKS(ADDR_SURF_16_BANK) |
2762                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765                /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2766                tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2770                           NUM_BANKS(ADDR_SURF_16_BANK) |
2771                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774                /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2775                tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                           TILE_SPLIT(split_equal_to_row_size) |
2779                           NUM_BANKS(ADDR_SURF_16_BANK) |
2780                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783                /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2784                tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                           TILE_SPLIT(split_equal_to_row_size) |
2788                           NUM_BANKS(ADDR_SURF_16_BANK) |
2789                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792                /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2793                tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                           MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2795                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796                           TILE_SPLIT(split_equal_to_row_size) |
2797                           NUM_BANKS(ADDR_SURF_16_BANK) |
2798                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2801                /* 1D and 1D Array Surfaces */
2802                tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2803                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806                           NUM_BANKS(ADDR_SURF_16_BANK) |
2807                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810                /* Displayable maps. */
2811                tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815                           NUM_BANKS(ADDR_SURF_16_BANK) |
2816                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819                /* Display 8bpp. */
2820                tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824                           NUM_BANKS(ADDR_SURF_16_BANK) |
2825                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2828                /* Display 16bpp. */
2829                tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833                           NUM_BANKS(ADDR_SURF_16_BANK) |
2834                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837                /* Display 32bpp. */
2838                tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                           MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2842                           NUM_BANKS(ADDR_SURF_16_BANK) |
2843                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846                /* Thin. */
2847                tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2851                           NUM_BANKS(ADDR_SURF_16_BANK) |
2852                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2854                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855                /* Thin 8 bpp. */
2856                tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860                           NUM_BANKS(ADDR_SURF_16_BANK) |
2861                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864                /* Thin 16 bpp. */
2865                tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                           NUM_BANKS(ADDR_SURF_16_BANK) |
2870                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873                /* Thin 32 bpp. */
2874                tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                           NUM_BANKS(ADDR_SURF_16_BANK) |
2879                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882                /* Thin 64 bpp. */
2883                tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885                           PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                           TILE_SPLIT(split_equal_to_row_size) |
2887                           NUM_BANKS(ADDR_SURF_16_BANK) |
2888                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891                /* 8 bpp PRT. */
2892                tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896                           NUM_BANKS(ADDR_SURF_16_BANK) |
2897                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2898                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2900                /* 16 bpp PRT */
2901                tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905                           NUM_BANKS(ADDR_SURF_16_BANK) |
2906                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2909                /* 32 bpp PRT */
2910                tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914                           NUM_BANKS(ADDR_SURF_16_BANK) |
2915                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918                /* 64 bpp PRT */
2919                tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2923                           NUM_BANKS(ADDR_SURF_16_BANK) |
2924                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2927                /* 128 bpp PRT */
2928                tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                           MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2930                           PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2931                           TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2932                           NUM_BANKS(ADDR_SURF_8_BANK) |
2933                           BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                           BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                           MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2936
2937                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2938                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2939                break;
2940
2941        default:
2942                DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943        }
2944}
2945
2946static void si_select_se_sh(struct radeon_device *rdev,
2947                            u32 se_num, u32 sh_num)
2948{
2949        u32 data = INSTANCE_BROADCAST_WRITES;
2950
2951        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2952                data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2953        else if (se_num == 0xffffffff)
2954                data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2955        else if (sh_num == 0xffffffff)
2956                data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2957        else
2958                data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2959        WREG32(GRBM_GFX_INDEX, data);
2960}
2961
2962static u32 si_create_bitmask(u32 bit_width)
2963{
2964        u32 i, mask = 0;
2965
2966        for (i = 0; i < bit_width; i++) {
2967                mask <<= 1;
2968                mask |= 1;
2969        }
2970        return mask;
2971}
2972
2973static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2974{
2975        u32 data, mask;
2976
2977        data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2978        if (data & 1)
2979                data &= INACTIVE_CUS_MASK;
2980        else
2981                data = 0;
2982        data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2983
2984        data >>= INACTIVE_CUS_SHIFT;
2985
2986        mask = si_create_bitmask(cu_per_sh);
2987
2988        return ~data & mask;
2989}
2990
2991static void si_setup_spi(struct radeon_device *rdev,
2992                         u32 se_num, u32 sh_per_se,
2993                         u32 cu_per_sh)
2994{
2995        int i, j, k;
2996        u32 data, mask, active_cu;
2997
2998        for (i = 0; i < se_num; i++) {
2999                for (j = 0; j < sh_per_se; j++) {
3000                        si_select_se_sh(rdev, i, j);
3001                        data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3002                        active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3003
3004                        mask = 1;
3005                        for (k = 0; k < 16; k++) {
3006                                mask <<= k;
3007                                if (active_cu & mask) {
3008                                        data &= ~mask;
3009                                        WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3010                                        break;
3011                                }
3012                        }
3013                }
3014        }
3015        si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3016}
3017
3018static u32 si_get_rb_disabled(struct radeon_device *rdev,
3019                              u32 max_rb_num_per_se,
3020                              u32 sh_per_se)
3021{
3022        u32 data, mask;
3023
3024        data = RREG32(CC_RB_BACKEND_DISABLE);
3025        if (data & 1)
3026                data &= BACKEND_DISABLE_MASK;
3027        else
3028                data = 0;
3029        data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3030
3031        data >>= BACKEND_DISABLE_SHIFT;
3032
3033        mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3034
3035        return data & mask;
3036}
3037
3038static void si_setup_rb(struct radeon_device *rdev,
3039                        u32 se_num, u32 sh_per_se,
3040                        u32 max_rb_num_per_se)
3041{
3042        int i, j;
3043        u32 data, mask;
3044        u32 disabled_rbs = 0;
3045        u32 enabled_rbs = 0;
3046
3047        for (i = 0; i < se_num; i++) {
3048                for (j = 0; j < sh_per_se; j++) {
3049                        si_select_se_sh(rdev, i, j);
3050                        data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3051                        disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3052                }
3053        }
3054        si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055
3056        mask = 1;
3057        for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3058                if (!(disabled_rbs & mask))
3059                        enabled_rbs |= mask;
3060                mask <<= 1;
3061        }
3062
3063        rdev->config.si.backend_enable_mask = enabled_rbs;
3064
3065        for (i = 0; i < se_num; i++) {
3066                si_select_se_sh(rdev, i, 0xffffffff);
3067                data = 0;
3068                for (j = 0; j < sh_per_se; j++) {
3069                        switch (enabled_rbs & 3) {
3070                        case 1:
3071                                data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3072                                break;
3073                        case 2:
3074                                data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3075                                break;
3076                        case 3:
3077                        default:
3078                                data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3079                                break;
3080                        }
3081                        enabled_rbs >>= 2;
3082                }
3083                WREG32(PA_SC_RASTER_CONFIG, data);
3084        }
3085        si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3086}
3087
3088static void si_gpu_init(struct radeon_device *rdev)
3089{
3090        u32 gb_addr_config = 0;
3091        u32 mc_arb_ramcfg;
3092        u32 sx_debug_1;
3093        u32 hdp_host_path_cntl;
3094        u32 tmp;
3095        int i, j;
3096
3097        switch (rdev->family) {
3098        case CHIP_TAHITI:
3099                rdev->config.si.max_shader_engines = 2;
3100                rdev->config.si.max_tile_pipes = 12;
3101                rdev->config.si.max_cu_per_sh = 8;
3102                rdev->config.si.max_sh_per_se = 2;
3103                rdev->config.si.max_backends_per_se = 4;
3104                rdev->config.si.max_texture_channel_caches = 12;
3105                rdev->config.si.max_gprs = 256;
3106                rdev->config.si.max_gs_threads = 32;
3107                rdev->config.si.max_hw_contexts = 8;
3108
3109                rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110                rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3111                rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112                rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113                gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3114                break;
3115        case CHIP_PITCAIRN:
3116                rdev->config.si.max_shader_engines = 2;
3117                rdev->config.si.max_tile_pipes = 8;
3118                rdev->config.si.max_cu_per_sh = 5;
3119                rdev->config.si.max_sh_per_se = 2;
3120                rdev->config.si.max_backends_per_se = 4;
3121                rdev->config.si.max_texture_channel_caches = 8;
3122                rdev->config.si.max_gprs = 256;
3123                rdev->config.si.max_gs_threads = 32;
3124                rdev->config.si.max_hw_contexts = 8;
3125
3126                rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127                rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3128                rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129                rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130                gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3131                break;
3132        case CHIP_VERDE:
3133        default:
3134                rdev->config.si.max_shader_engines = 1;
3135                rdev->config.si.max_tile_pipes = 4;
3136                rdev->config.si.max_cu_per_sh = 5;
3137                rdev->config.si.max_sh_per_se = 2;
3138                rdev->config.si.max_backends_per_se = 4;
3139                rdev->config.si.max_texture_channel_caches = 4;
3140                rdev->config.si.max_gprs = 256;
3141                rdev->config.si.max_gs_threads = 32;
3142                rdev->config.si.max_hw_contexts = 8;
3143
3144                rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3145                rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3146                rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3147                rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3148                gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3149                break;
3150        case CHIP_OLAND:
3151                rdev->config.si.max_shader_engines = 1;
3152                rdev->config.si.max_tile_pipes = 4;
3153                rdev->config.si.max_cu_per_sh = 6;
3154                rdev->config.si.max_sh_per_se = 1;
3155                rdev->config.si.max_backends_per_se = 2;
3156                rdev->config.si.max_texture_channel_caches = 4;
3157                rdev->config.si.max_gprs = 256;
3158                rdev->config.si.max_gs_threads = 16;
3159                rdev->config.si.max_hw_contexts = 8;
3160
3161                rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3162                rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3163                rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3164                rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3165                gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3166                break;
3167        case CHIP_HAINAN:
3168                rdev->config.si.max_shader_engines = 1;
3169                rdev->config.si.max_tile_pipes = 4;
3170                rdev->config.si.max_cu_per_sh = 5;
3171                rdev->config.si.max_sh_per_se = 1;
3172                rdev->config.si.max_backends_per_se = 1;
3173                rdev->config.si.max_texture_channel_caches = 2;
3174                rdev->config.si.max_gprs = 256;
3175                rdev->config.si.max_gs_threads = 16;
3176                rdev->config.si.max_hw_contexts = 8;
3177
3178                rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3179                rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3180                rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3181                rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3182                gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3183                break;
3184        }
3185
3186        /* Initialize HDP */
3187        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188                WREG32((0x2c14 + j), 0x00000000);
3189                WREG32((0x2c18 + j), 0x00000000);
3190                WREG32((0x2c1c + j), 0x00000000);
3191                WREG32((0x2c20 + j), 0x00000000);
3192                WREG32((0x2c24 + j), 0x00000000);
3193        }
3194
3195        WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196        WREG32(SRBM_INT_CNTL, 1);
3197        WREG32(SRBM_INT_ACK, 1);
3198
3199        evergreen_fix_pci_max_read_req_size(rdev);
3200
3201        WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3202
3203        RREG32(MC_SHARED_CHMAP);
3204        mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3205
3206        rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3207        rdev->config.si.mem_max_burst_length_bytes = 256;
3208        tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3209        rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3210        if (rdev->config.si.mem_row_size_in_kb > 4)
3211                rdev->config.si.mem_row_size_in_kb = 4;
3212        /* XXX use MC settings? */
3213        rdev->config.si.shader_engine_tile_size = 32;
3214        rdev->config.si.num_gpus = 1;
3215        rdev->config.si.multi_gpu_tile_size = 64;
3216
3217        /* fix up row size */
3218        gb_addr_config &= ~ROW_SIZE_MASK;
3219        switch (rdev->config.si.mem_row_size_in_kb) {
3220        case 1:
3221        default:
3222                gb_addr_config |= ROW_SIZE(0);
3223                break;
3224        case 2:
3225                gb_addr_config |= ROW_SIZE(1);
3226                break;
3227        case 4:
3228                gb_addr_config |= ROW_SIZE(2);
3229                break;
3230        }
3231
3232        /* setup tiling info dword.  gb_addr_config is not adequate since it does
3233         * not have bank info, so create a custom tiling dword.
3234         * bits 3:0   num_pipes
3235         * bits 7:4   num_banks
3236         * bits 11:8  group_size
3237         * bits 15:12 row_size
3238         */
3239        rdev->config.si.tile_config = 0;
3240        switch (rdev->config.si.num_tile_pipes) {
3241        case 1:
3242                rdev->config.si.tile_config |= (0 << 0);
3243                break;
3244        case 2:
3245                rdev->config.si.tile_config |= (1 << 0);
3246                break;
3247        case 4:
3248                rdev->config.si.tile_config |= (2 << 0);
3249                break;
3250        case 8:
3251        default:
3252                /* XXX what about 12? */
3253                rdev->config.si.tile_config |= (3 << 0);
3254                break;
3255        }
3256        switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3257        case 0: /* four banks */
3258                rdev->config.si.tile_config |= 0 << 4;
3259                break;
3260        case 1: /* eight banks */
3261                rdev->config.si.tile_config |= 1 << 4;
3262                break;
3263        case 2: /* sixteen banks */
3264        default:
3265                rdev->config.si.tile_config |= 2 << 4;
3266                break;
3267        }
3268        rdev->config.si.tile_config |=
3269                ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3270        rdev->config.si.tile_config |=
3271                ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3272
3273        WREG32(GB_ADDR_CONFIG, gb_addr_config);
3274        WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3275        WREG32(DMIF_ADDR_CALC, gb_addr_config);
3276        WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3277        WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3278        WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3279        if (rdev->has_uvd) {
3280                WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3281                WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3282                WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3283        }
3284
3285        si_tiling_mode_table_init(rdev);
3286
3287        si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3288                    rdev->config.si.max_sh_per_se,
3289                    rdev->config.si.max_backends_per_se);
3290
3291        si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3292                     rdev->config.si.max_sh_per_se,
3293                     rdev->config.si.max_cu_per_sh);
3294
3295        rdev->config.si.active_cus = 0;
3296        for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3297                for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3298                        rdev->config.si.active_cus +=
3299                                hweight32(si_get_cu_active_bitmap(rdev, i, j));
3300                }
3301        }
3302
3303        /* set HW defaults for 3D engine */
3304        WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3305                                     ROQ_IB2_START(0x2b)));
3306        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3307
3308        sx_debug_1 = RREG32(SX_DEBUG_1);
3309        WREG32(SX_DEBUG_1, sx_debug_1);
3310
3311        WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3312
3313        WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3314                                 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3315                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3316                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3317
3318        WREG32(VGT_NUM_INSTANCES, 1);
3319
3320        WREG32(CP_PERFMON_CNTL, 0);
3321
3322        WREG32(SQ_CONFIG, 0);
3323
3324        WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3325                                          FORCE_EOV_MAX_REZ_CNT(255)));
3326
3327        WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3328               AUTO_INVLD_EN(ES_AND_GS_AUTO));
3329
3330        WREG32(VGT_GS_VERTEX_REUSE, 16);
3331        WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3332
3333        WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3334        WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3335        WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3336        WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3337        WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3338        WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3339        WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3340        WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3341
3342        tmp = RREG32(HDP_MISC_CNTL);
3343        tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3344        WREG32(HDP_MISC_CNTL, tmp);
3345
3346        hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3347        WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3348
3349        WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3350
3351        udelay(50);
3352}
3353
3354/*
3355 * GPU scratch registers helpers function.
3356 */
3357static void si_scratch_init(struct radeon_device *rdev)
3358{
3359        int i;
3360
3361        rdev->scratch.num_reg = 7;
3362        rdev->scratch.reg_base = SCRATCH_REG0;
3363        for (i = 0; i < rdev->scratch.num_reg; i++) {
3364                rdev->scratch.free[i] = true;
3365                rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3366        }
3367}
3368
3369void si_fence_ring_emit(struct radeon_device *rdev,
3370                        struct radeon_fence *fence)
3371{
3372        struct radeon_ring *ring = &rdev->ring[fence->ring];
3373        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3374
3375        /* flush read cache over gart */
3376        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3377        radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3378        radeon_ring_write(ring, 0);
3379        radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3380        radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3381                          PACKET3_TC_ACTION_ENA |
3382                          PACKET3_SH_KCACHE_ACTION_ENA |
3383                          PACKET3_SH_ICACHE_ACTION_ENA);
3384        radeon_ring_write(ring, 0xFFFFFFFF);
3385        radeon_ring_write(ring, 0);
3386        radeon_ring_write(ring, 10); /* poll interval */
3387        /* EVENT_WRITE_EOP - flush caches, send int */
3388        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3389        radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3390        radeon_ring_write(ring, lower_32_bits(addr));
3391        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3392        radeon_ring_write(ring, fence->seq);
3393        radeon_ring_write(ring, 0);
3394}
3395
3396/*
3397 * IB stuff
3398 */
3399void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3400{
3401        struct radeon_ring *ring = &rdev->ring[ib->ring];
3402        unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3403        u32 header;
3404
3405        if (ib->is_const_ib) {
3406                /* set switch buffer packet before const IB */
3407                radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3408                radeon_ring_write(ring, 0);
3409
3410                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3411        } else {
3412                u32 next_rptr;
3413                if (ring->rptr_save_reg) {
3414                        next_rptr = ring->wptr + 3 + 4 + 8;
3415                        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3416                        radeon_ring_write(ring, ((ring->rptr_save_reg -
3417                                                  PACKET3_SET_CONFIG_REG_START) >> 2));
3418                        radeon_ring_write(ring, next_rptr);
3419                } else if (rdev->wb.enabled) {
3420                        next_rptr = ring->wptr + 5 + 4 + 8;
3421                        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3422                        radeon_ring_write(ring, (1 << 8));
3423                        radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3424                        radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3425                        radeon_ring_write(ring, next_rptr);
3426                }
3427
3428                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3429        }
3430
3431        radeon_ring_write(ring, header);
3432        radeon_ring_write(ring,
3433#ifdef __BIG_ENDIAN
3434                          (2 << 0) |
3435#endif
3436                          (ib->gpu_addr & 0xFFFFFFFC));
3437        radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3438        radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3439
3440        if (!ib->is_const_ib) {
3441                /* flush read cache over gart for this vmid */
3442                radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3443                radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3444                radeon_ring_write(ring, vm_id);
3445                radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3446                radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3447                                  PACKET3_TC_ACTION_ENA |
3448                                  PACKET3_SH_KCACHE_ACTION_ENA |
3449                                  PACKET3_SH_ICACHE_ACTION_ENA);
3450                radeon_ring_write(ring, 0xFFFFFFFF);
3451                radeon_ring_write(ring, 0);
3452                radeon_ring_write(ring, 10); /* poll interval */
3453        }
3454}
3455
3456/*
3457 * CP.
3458 */
3459static void si_cp_enable(struct radeon_device *rdev, bool enable)
3460{
3461        if (enable)
3462                WREG32(CP_ME_CNTL, 0);
3463        else {
3464                if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3465                        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3466                WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3467                WREG32(SCRATCH_UMSK, 0);
3468                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3469                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3471        }
3472        udelay(50);
3473}
3474
3475static int si_cp_load_microcode(struct radeon_device *rdev)
3476{
3477        int i;
3478
3479        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3480                return -EINVAL;
3481
3482        si_cp_enable(rdev, false);
3483
3484        if (rdev->new_fw) {
3485                const struct gfx_firmware_header_v1_0 *pfp_hdr =
3486                        (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3487                const struct gfx_firmware_header_v1_0 *ce_hdr =
3488                        (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3489                const struct gfx_firmware_header_v1_0 *me_hdr =
3490                        (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3491                const __le32 *fw_data;
3492                u32 fw_size;
3493
3494                radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3495                radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3496                radeon_ucode_print_gfx_hdr(&me_hdr->header);
3497
3498                /* PFP */
3499                fw_data = (const __le32 *)
3500                        (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3501                fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3502                WREG32(CP_PFP_UCODE_ADDR, 0);
3503                for (i = 0; i < fw_size; i++)
3504                        WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3505                WREG32(CP_PFP_UCODE_ADDR, 0);
3506
3507                /* CE */
3508                fw_data = (const __le32 *)
3509                        (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3510                fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3511                WREG32(CP_CE_UCODE_ADDR, 0);
3512                for (i = 0; i < fw_size; i++)
3513                        WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3514                WREG32(CP_CE_UCODE_ADDR, 0);
3515
3516                /* ME */
3517                fw_data = (const __be32 *)
3518                        (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3519                fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3520                WREG32(CP_ME_RAM_WADDR, 0);
3521                for (i = 0; i < fw_size; i++)
3522                        WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3523                WREG32(CP_ME_RAM_WADDR, 0);
3524        } else {
3525                const __be32 *fw_data;
3526
3527                /* PFP */
3528                fw_data = (const __be32 *)rdev->pfp_fw->data;
3529                WREG32(CP_PFP_UCODE_ADDR, 0);
3530                for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3531                        WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3532                WREG32(CP_PFP_UCODE_ADDR, 0);
3533
3534                /* CE */
3535                fw_data = (const __be32 *)rdev->ce_fw->data;
3536                WREG32(CP_CE_UCODE_ADDR, 0);
3537                for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3538                        WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3539                WREG32(CP_CE_UCODE_ADDR, 0);
3540
3541                /* ME */
3542                fw_data = (const __be32 *)rdev->me_fw->data;
3543                WREG32(CP_ME_RAM_WADDR, 0);
3544                for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3545                        WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3546                WREG32(CP_ME_RAM_WADDR, 0);
3547        }
3548
3549        WREG32(CP_PFP_UCODE_ADDR, 0);
3550        WREG32(CP_CE_UCODE_ADDR, 0);
3551        WREG32(CP_ME_RAM_WADDR, 0);
3552        WREG32(CP_ME_RAM_RADDR, 0);
3553        return 0;
3554}
3555
3556static int si_cp_start(struct radeon_device *rdev)
3557{
3558        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3559        int r, i;
3560
3561        r = radeon_ring_lock(rdev, ring, 7 + 4);
3562        if (r) {
3563                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564                return r;
3565        }
3566        /* init the CP */
3567        radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3568        radeon_ring_write(ring, 0x1);
3569        radeon_ring_write(ring, 0x0);
3570        radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3571        radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3572        radeon_ring_write(ring, 0);
3573        radeon_ring_write(ring, 0);
3574
3575        /* init the CE partitions */
3576        radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3577        radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3578        radeon_ring_write(ring, 0xc000);
3579        radeon_ring_write(ring, 0xe000);
3580        radeon_ring_unlock_commit(rdev, ring, false);
3581
3582        si_cp_enable(rdev, true);
3583
3584        r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3585        if (r) {
3586                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3587                return r;
3588        }
3589
3590        /* setup clear context state */
3591        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3592        radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3593
3594        for (i = 0; i < si_default_size; i++)
3595                radeon_ring_write(ring, si_default_state[i]);
3596
3597        radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3598        radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3599
3600        /* set clear context state */
3601        radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3602        radeon_ring_write(ring, 0);
3603
3604        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3605        radeon_ring_write(ring, 0x00000316);
3606        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3607        radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3608
3609        radeon_ring_unlock_commit(rdev, ring, false);
3610
3611        for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3612                ring = &rdev->ring[i];
3613                r = radeon_ring_lock(rdev, ring, 2);
3614
3615                /* clear the compute context state */
3616                radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3617                radeon_ring_write(ring, 0);
3618
3619                radeon_ring_unlock_commit(rdev, ring, false);
3620        }
3621
3622        return 0;
3623}
3624
3625static void si_cp_fini(struct radeon_device *rdev)
3626{
3627        struct radeon_ring *ring;
3628        si_cp_enable(rdev, false);
3629
3630        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631        radeon_ring_fini(rdev, ring);
3632        radeon_scratch_free(rdev, ring->rptr_save_reg);
3633
3634        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3635        radeon_ring_fini(rdev, ring);
3636        radeon_scratch_free(rdev, ring->rptr_save_reg);
3637
3638        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3639        radeon_ring_fini(rdev, ring);
3640        radeon_scratch_free(rdev, ring->rptr_save_reg);
3641}
3642
3643static int si_cp_resume(struct radeon_device *rdev)
3644{
3645        struct radeon_ring *ring;
3646        u32 tmp;
3647        u32 rb_bufsz;
3648        int r;
3649
3650        si_enable_gui_idle_interrupt(rdev, false);
3651
3652        WREG32(CP_SEM_WAIT_TIMER, 0x0);
3653        WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3654
3655        /* Set the write pointer delay */
3656        WREG32(CP_RB_WPTR_DELAY, 0);
3657
3658        WREG32(CP_DEBUG, 0);
3659        WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3660
3661        /* ring 0 - compute and gfx */
3662        /* Set ring buffer size */
3663        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3664        rb_bufsz = order_base_2(ring->ring_size / 8);
3665        tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666#ifdef __BIG_ENDIAN
3667        tmp |= BUF_SWAP_32BIT;
3668#endif
3669        WREG32(CP_RB0_CNTL, tmp);
3670
3671        /* Initialize the ring buffer's read and write pointers */
3672        WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3673        ring->wptr = 0;
3674        WREG32(CP_RB0_WPTR, ring->wptr);
3675
3676        /* set the wb address whether it's enabled or not */
3677        WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3678        WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3679
3680        if (rdev->wb.enabled)
3681                WREG32(SCRATCH_UMSK, 0xff);
3682        else {
3683                tmp |= RB_NO_UPDATE;
3684                WREG32(SCRATCH_UMSK, 0);
3685        }
3686
3687        mdelay(1);
3688        WREG32(CP_RB0_CNTL, tmp);
3689
3690        WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3691
3692        /* ring1  - compute only */
3693        /* Set ring buffer size */
3694        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3695        rb_bufsz = order_base_2(ring->ring_size / 8);
3696        tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3697#ifdef __BIG_ENDIAN
3698        tmp |= BUF_SWAP_32BIT;
3699#endif
3700        WREG32(CP_RB1_CNTL, tmp);
3701
3702        /* Initialize the ring buffer's read and write pointers */
3703        WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3704        ring->wptr = 0;
3705        WREG32(CP_RB1_WPTR, ring->wptr);
3706
3707        /* set the wb address whether it's enabled or not */
3708        WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3709        WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3710
3711        mdelay(1);
3712        WREG32(CP_RB1_CNTL, tmp);
3713
3714        WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3715
3716        /* ring2 - compute only */
3717        /* Set ring buffer size */
3718        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3719        rb_bufsz = order_base_2(ring->ring_size / 8);
3720        tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3721#ifdef __BIG_ENDIAN
3722        tmp |= BUF_SWAP_32BIT;
3723#endif
3724        WREG32(CP_RB2_CNTL, tmp);
3725
3726        /* Initialize the ring buffer's read and write pointers */
3727        WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3728        ring->wptr = 0;
3729        WREG32(CP_RB2_WPTR, ring->wptr);
3730
3731        /* set the wb address whether it's enabled or not */
3732        WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3733        WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3734
3735        mdelay(1);
3736        WREG32(CP_RB2_CNTL, tmp);
3737
3738        WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3739
3740        /* start the rings */
3741        si_cp_start(rdev);
3742        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3743        rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3744        rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3745        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3746        if (r) {
3747                rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3748                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3749                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750                return r;
3751        }
3752        r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3753        if (r) {
3754                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3755        }
3756        r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3757        if (r) {
3758                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3759        }
3760
3761        si_enable_gui_idle_interrupt(rdev, true);
3762
3763        if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3764                radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3765
3766        return 0;
3767}
3768
3769u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3770{
3771        u32 reset_mask = 0;
3772        u32 tmp;
3773
3774        /* GRBM_STATUS */
3775        tmp = RREG32(GRBM_STATUS);
3776        if (tmp & (PA_BUSY | SC_BUSY |
3777                   BCI_BUSY | SX_BUSY |
3778                   TA_BUSY | VGT_BUSY |
3779                   DB_BUSY | CB_BUSY |
3780                   GDS_BUSY | SPI_BUSY |
3781                   IA_BUSY | IA_BUSY_NO_DMA))
3782                reset_mask |= RADEON_RESET_GFX;
3783
3784        if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3785                   CP_BUSY | CP_COHERENCY_BUSY))
3786                reset_mask |= RADEON_RESET_CP;
3787
3788        if (tmp & GRBM_EE_BUSY)
3789                reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3790
3791        /* GRBM_STATUS2 */
3792        tmp = RREG32(GRBM_STATUS2);
3793        if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3794                reset_mask |= RADEON_RESET_RLC;
3795
3796        /* DMA_STATUS_REG 0 */
3797        tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3798        if (!(tmp & DMA_IDLE))
3799                reset_mask |= RADEON_RESET_DMA;
3800
3801        /* DMA_STATUS_REG 1 */
3802        tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3803        if (!(tmp & DMA_IDLE))
3804                reset_mask |= RADEON_RESET_DMA1;
3805
3806        /* SRBM_STATUS2 */
3807        tmp = RREG32(SRBM_STATUS2);
3808        if (tmp & DMA_BUSY)
3809                reset_mask |= RADEON_RESET_DMA;
3810
3811        if (tmp & DMA1_BUSY)
3812                reset_mask |= RADEON_RESET_DMA1;
3813
3814        /* SRBM_STATUS */
3815        tmp = RREG32(SRBM_STATUS);
3816
3817        if (tmp & IH_BUSY)
3818                reset_mask |= RADEON_RESET_IH;
3819
3820        if (tmp & SEM_BUSY)
3821                reset_mask |= RADEON_RESET_SEM;
3822
3823        if (tmp & GRBM_RQ_PENDING)
3824                reset_mask |= RADEON_RESET_GRBM;
3825
3826        if (tmp & VMC_BUSY)
3827                reset_mask |= RADEON_RESET_VMC;
3828
3829        if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3830                   MCC_BUSY | MCD_BUSY))
3831                reset_mask |= RADEON_RESET_MC;
3832
3833        if (evergreen_is_display_hung(rdev))
3834                reset_mask |= RADEON_RESET_DISPLAY;
3835
3836        /* VM_L2_STATUS */
3837        tmp = RREG32(VM_L2_STATUS);
3838        if (tmp & L2_BUSY)
3839                reset_mask |= RADEON_RESET_VMC;
3840
3841        /* Skip MC reset as it's mostly likely not hung, just busy */
3842        if (reset_mask & RADEON_RESET_MC) {
3843                DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3844                reset_mask &= ~RADEON_RESET_MC;
3845        }
3846
3847        return reset_mask;
3848}
3849
3850static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3851{
3852        struct evergreen_mc_save save;
3853        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3854        u32 tmp;
3855
3856        if (reset_mask == 0)
3857                return;
3858
3859        dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3860
3861        evergreen_print_gpu_status_regs(rdev);
3862        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3863                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3864        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3865                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3866
3867        /* disable PG/CG */
3868        si_fini_pg(rdev);
3869        si_fini_cg(rdev);
3870
3871        /* stop the rlc */
3872        si_rlc_stop(rdev);
3873
3874        /* Disable CP parsing/prefetching */
3875        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3876
3877        if (reset_mask & RADEON_RESET_DMA) {
3878                /* dma0 */
3879                tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3880                tmp &= ~DMA_RB_ENABLE;
3881                WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3882        }
3883        if (reset_mask & RADEON_RESET_DMA1) {
3884                /* dma1 */
3885                tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3886                tmp &= ~DMA_RB_ENABLE;
3887                WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3888        }
3889
3890        udelay(50);
3891
3892        evergreen_mc_stop(rdev, &save);
3893        if (evergreen_mc_wait_for_idle(rdev)) {
3894                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3895        }
3896
3897        if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3898                grbm_soft_reset = SOFT_RESET_CB |
3899                        SOFT_RESET_DB |
3900                        SOFT_RESET_GDS |
3901                        SOFT_RESET_PA |
3902                        SOFT_RESET_SC |
3903                        SOFT_RESET_BCI |
3904                        SOFT_RESET_SPI |
3905                        SOFT_RESET_SX |
3906                        SOFT_RESET_TC |
3907                        SOFT_RESET_TA |
3908                        SOFT_RESET_VGT |
3909                        SOFT_RESET_IA;
3910        }
3911
3912        if (reset_mask & RADEON_RESET_CP) {
3913                grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3914
3915                srbm_soft_reset |= SOFT_RESET_GRBM;
3916        }
3917
3918        if (reset_mask & RADEON_RESET_DMA)
3919                srbm_soft_reset |= SOFT_RESET_DMA;
3920
3921        if (reset_mask & RADEON_RESET_DMA1)
3922                srbm_soft_reset |= SOFT_RESET_DMA1;
3923
3924        if (reset_mask & RADEON_RESET_DISPLAY)
3925                srbm_soft_reset |= SOFT_RESET_DC;
3926
3927        if (reset_mask & RADEON_RESET_RLC)
3928                grbm_soft_reset |= SOFT_RESET_RLC;
3929
3930        if (reset_mask & RADEON_RESET_SEM)
3931                srbm_soft_reset |= SOFT_RESET_SEM;
3932
3933        if (reset_mask & RADEON_RESET_IH)
3934                srbm_soft_reset |= SOFT_RESET_IH;
3935
3936        if (reset_mask & RADEON_RESET_GRBM)
3937                srbm_soft_reset |= SOFT_RESET_GRBM;
3938
3939        if (reset_mask & RADEON_RESET_VMC)
3940                srbm_soft_reset |= SOFT_RESET_VMC;
3941
3942        if (reset_mask & RADEON_RESET_MC)
3943                srbm_soft_reset |= SOFT_RESET_MC;
3944
3945        if (grbm_soft_reset) {
3946                tmp = RREG32(GRBM_SOFT_RESET);
3947                tmp |= grbm_soft_reset;
3948                dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949                WREG32(GRBM_SOFT_RESET, tmp);
3950                tmp = RREG32(GRBM_SOFT_RESET);
3951
3952                udelay(50);
3953
3954                tmp &= ~grbm_soft_reset;
3955                WREG32(GRBM_SOFT_RESET, tmp);
3956                tmp = RREG32(GRBM_SOFT_RESET);
3957        }
3958
3959        if (srbm_soft_reset) {
3960                tmp = RREG32(SRBM_SOFT_RESET);
3961                tmp |= srbm_soft_reset;
3962                dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3963                WREG32(SRBM_SOFT_RESET, tmp);
3964                tmp = RREG32(SRBM_SOFT_RESET);
3965
3966                udelay(50);
3967
3968                tmp &= ~srbm_soft_reset;
3969                WREG32(SRBM_SOFT_RESET, tmp);
3970                tmp = RREG32(SRBM_SOFT_RESET);
3971        }
3972
3973        /* Wait a little for things to settle down */
3974        udelay(50);
3975
3976        evergreen_mc_resume(rdev, &save);
3977        udelay(50);
3978
3979        evergreen_print_gpu_status_regs(rdev);
3980}
3981
3982static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3983{
3984        u32 tmp, i;
3985
3986        tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987        tmp |= SPLL_BYPASS_EN;
3988        WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989
3990        tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991        tmp |= SPLL_CTLREQ_CHG;
3992        WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993
3994        for (i = 0; i < rdev->usec_timeout; i++) {
3995                if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3996                        break;
3997                udelay(1);
3998        }
3999
4000        tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4001        tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4002        WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4003
4004        tmp = RREG32(MPLL_CNTL_MODE);
4005        tmp &= ~MPLL_MCLK_SEL;
4006        WREG32(MPLL_CNTL_MODE, tmp);
4007}
4008
4009static void si_spll_powerdown(struct radeon_device *rdev)
4010{
4011        u32 tmp;
4012
4013        tmp = RREG32(SPLL_CNTL_MODE);
4014        tmp |= SPLL_SW_DIR_CONTROL;
4015        WREG32(SPLL_CNTL_MODE, tmp);
4016
4017        tmp = RREG32(CG_SPLL_FUNC_CNTL);
4018        tmp |= SPLL_RESET;
4019        WREG32(CG_SPLL_FUNC_CNTL, tmp);
4020
4021        tmp = RREG32(CG_SPLL_FUNC_CNTL);
4022        tmp |= SPLL_SLEEP;
4023        WREG32(CG_SPLL_FUNC_CNTL, tmp);
4024
4025        tmp = RREG32(SPLL_CNTL_MODE);
4026        tmp &= ~SPLL_SW_DIR_CONTROL;
4027        WREG32(SPLL_CNTL_MODE, tmp);
4028}
4029
4030static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4031{
4032        struct evergreen_mc_save save;
4033        u32 tmp, i;
4034
4035        dev_info(rdev->dev, "GPU pci config reset\n");
4036
4037        /* disable dpm? */
4038
4039        /* disable cg/pg */
4040        si_fini_pg(rdev);
4041        si_fini_cg(rdev);
4042
4043        /* Disable CP parsing/prefetching */
4044        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4045        /* dma0 */
4046        tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4047        tmp &= ~DMA_RB_ENABLE;
4048        WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4049        /* dma1 */
4050        tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4051        tmp &= ~DMA_RB_ENABLE;
4052        WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4053        /* XXX other engines? */
4054
4055        /* halt the rlc, disable cp internal ints */
4056        si_rlc_stop(rdev);
4057
4058        udelay(50);
4059
4060        /* disable mem access */
4061        evergreen_mc_stop(rdev, &save);
4062        if (evergreen_mc_wait_for_idle(rdev)) {
4063                dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4064        }
4065
4066        /* set mclk/sclk to bypass */
4067        si_set_clk_bypass_mode(rdev);
4068        /* powerdown spll */
4069        si_spll_powerdown(rdev);
4070        /* disable BM */
4071        pci_clear_master(rdev->pdev);
4072        /* reset */
4073        radeon_pci_config_reset(rdev);
4074        /* wait for asic to come out of reset */
4075        for (i = 0; i < rdev->usec_timeout; i++) {
4076                if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4077                        break;
4078                udelay(1);
4079        }
4080}
4081
4082int si_asic_reset(struct radeon_device *rdev, bool hard)
4083{
4084        u32 reset_mask;
4085
4086        if (hard) {
4087                si_gpu_pci_config_reset(rdev);
4088                return 0;
4089        }
4090
4091        reset_mask = si_gpu_check_soft_reset(rdev);
4092
4093        if (reset_mask)
4094                r600_set_bios_scratch_engine_hung(rdev, true);
4095
4096        /* try soft reset */
4097        si_gpu_soft_reset(rdev, reset_mask);
4098
4099        reset_mask = si_gpu_check_soft_reset(rdev);
4100
4101        /* try pci config reset */
4102        if (reset_mask && radeon_hard_reset)
4103                si_gpu_pci_config_reset(rdev);
4104
4105        reset_mask = si_gpu_check_soft_reset(rdev);
4106
4107        if (!reset_mask)
4108                r600_set_bios_scratch_engine_hung(rdev, false);
4109
4110        return 0;
4111}
4112
4113/**
4114 * si_gfx_is_lockup - Check if the GFX engine is locked up
4115 *
4116 * @rdev: radeon_device pointer
4117 * @ring: radeon_ring structure holding ring information
4118 *
4119 * Check if the GFX engine is locked up.
4120 * Returns true if the engine appears to be locked up, false if not.
4121 */
4122bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4123{
4124        u32 reset_mask = si_gpu_check_soft_reset(rdev);
4125
4126        if (!(reset_mask & (RADEON_RESET_GFX |
4127                            RADEON_RESET_COMPUTE |
4128                            RADEON_RESET_CP))) {
4129                radeon_ring_lockup_update(rdev, ring);
4130                return false;
4131        }
4132        return radeon_ring_test_lockup(rdev, ring);
4133}
4134
4135/* MC */
4136static void si_mc_program(struct radeon_device *rdev)
4137{
4138        struct evergreen_mc_save save;
4139        u32 tmp;
4140        int i, j;
4141
4142        /* Initialize HDP */
4143        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4144                WREG32((0x2c14 + j), 0x00000000);
4145                WREG32((0x2c18 + j), 0x00000000);
4146                WREG32((0x2c1c + j), 0x00000000);
4147                WREG32((0x2c20 + j), 0x00000000);
4148                WREG32((0x2c24 + j), 0x00000000);
4149        }
4150        WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4151
4152        evergreen_mc_stop(rdev, &save);
4153        if (radeon_mc_wait_for_idle(rdev)) {
4154                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4155        }
4156        if (!ASIC_IS_NODCE(rdev))
4157                /* Lockout access through VGA aperture*/
4158                WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4159        /* Update configuration */
4160        WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4161               rdev->mc.vram_start >> 12);
4162        WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4163               rdev->mc.vram_end >> 12);
4164        WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4165               rdev->vram_scratch.gpu_addr >> 12);
4166        tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4167        tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4168        WREG32(MC_VM_FB_LOCATION, tmp);
4169        /* XXX double check these! */
4170        WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4171        WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4172        WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4173        WREG32(MC_VM_AGP_BASE, 0);
4174        WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4175        WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4176        if (radeon_mc_wait_for_idle(rdev)) {
4177                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4178        }
4179        evergreen_mc_resume(rdev, &save);
4180        if (!ASIC_IS_NODCE(rdev)) {
4181                /* we need to own VRAM, so turn off the VGA renderer here
4182                 * to stop it overwriting our objects */
4183                rv515_vga_render_disable(rdev);
4184        }
4185}
4186
4187void si_vram_gtt_location(struct radeon_device *rdev,
4188                          struct radeon_mc *mc)
4189{
4190        if (mc->mc_vram_size > 0xFFC0000000ULL) {
4191                /* leave room for at least 1024M GTT */
4192                dev_warn(rdev->dev, "limiting VRAM\n");
4193                mc->real_vram_size = 0xFFC0000000ULL;
4194                mc->mc_vram_size = 0xFFC0000000ULL;
4195        }
4196        radeon_vram_location(rdev, &rdev->mc, 0);
4197        rdev->mc.gtt_base_align = 0;
4198        radeon_gtt_location(rdev, mc);
4199}
4200
4201static int si_mc_init(struct radeon_device *rdev)
4202{
4203        u32 tmp;
4204        int chansize, numchan;
4205
4206        /* Get VRAM informations */
4207        rdev->mc.vram_is_ddr = true;
4208        tmp = RREG32(MC_ARB_RAMCFG);
4209        if (tmp & CHANSIZE_OVERRIDE) {
4210                chansize = 16;
4211        } else if (tmp & CHANSIZE_MASK) {
4212                chansize = 64;
4213        } else {
4214                chansize = 32;
4215        }
4216        tmp = RREG32(MC_SHARED_CHMAP);
4217        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4218        case 0:
4219        default:
4220                numchan = 1;
4221                break;
4222        case 1:
4223                numchan = 2;
4224                break;
4225        case 2:
4226                numchan = 4;
4227                break;
4228        case 3:
4229                numchan = 8;
4230                break;
4231        case 4:
4232                numchan = 3;
4233                break;
4234        case 5:
4235                numchan = 6;
4236                break;
4237        case 6:
4238                numchan = 10;
4239                break;
4240        case 7:
4241                numchan = 12;
4242                break;
4243        case 8:
4244                numchan = 16;
4245                break;
4246        }
4247        rdev->mc.vram_width = numchan * chansize;
4248        /* Could aper size report 0 ? */
4249        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4250        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4251        /* size in MB on si */
4252        tmp = RREG32(CONFIG_MEMSIZE);
4253        /* some boards may have garbage in the upper 16 bits */
4254        if (tmp & 0xffff0000) {
4255                DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4256                if (tmp & 0xffff)
4257                        tmp &= 0xffff;
4258        }
4259        rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4260        rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4261        rdev->mc.visible_vram_size = rdev->mc.aper_size;
4262        si_vram_gtt_location(rdev, &rdev->mc);
4263        radeon_update_bandwidth_info(rdev);
4264
4265        return 0;
4266}
4267
4268/*
4269 * GART
4270 */
4271void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4272{
4273        /* flush hdp cache */
4274        WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4275
4276        /* bits 0-15 are the VM contexts0-15 */
4277        WREG32(VM_INVALIDATE_REQUEST, 1);
4278}
4279
4280static int si_pcie_gart_enable(struct radeon_device *rdev)
4281{
4282        int r, i;
4283
4284        if (rdev->gart.robj == NULL) {
4285                dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4286                return -EINVAL;
4287        }
4288        r = radeon_gart_table_vram_pin(rdev);
4289        if (r)
4290                return r;
4291        /* Setup TLB control */
4292        WREG32(MC_VM_MX_L1_TLB_CNTL,
4293               (0xA << 7) |
4294               ENABLE_L1_TLB |
4295               ENABLE_L1_FRAGMENT_PROCESSING |
4296               SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4297               ENABLE_ADVANCED_DRIVER_MODEL |
4298               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4299        /* Setup L2 cache */
4300        WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4301               ENABLE_L2_FRAGMENT_PROCESSING |
4302               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4303               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4304               EFFECTIVE_L2_QUEUE_SIZE(7) |
4305               CONTEXT1_IDENTITY_ACCESS_MODE(1));
4306        WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4307        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4308               BANK_SELECT(4) |
4309               L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4310        /* setup context0 */
4311        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4312        WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4313        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4314        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4315                        (u32)(rdev->dummy_page.addr >> 12));
4316        WREG32(VM_CONTEXT0_CNTL2, 0);
4317        WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4318                                  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4319
4320        WREG32(0x15D4, 0);
4321        WREG32(0x15D8, 0);
4322        WREG32(0x15DC, 0);
4323
4324        /* empty context1-15 */
4325        /* set vm size, must be a multiple of 4 */
4326        WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4327        WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4328        /* Assign the pt base to something valid for now; the pts used for
4329         * the VMs are determined by the application and setup and assigned
4330         * on the fly in the vm part of radeon_gart.c
4331         */
4332        for (i = 1; i < 16; i++) {
4333                if (i < 8)
4334                        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4335                               rdev->vm_manager.saved_table_addr[i]);
4336                else
4337                        WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4338                               rdev->vm_manager.saved_table_addr[i]);
4339        }
4340
4341        /* enable context1-15 */
4342        WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4343               (u32)(rdev->dummy_page.addr >> 12));
4344        WREG32(VM_CONTEXT1_CNTL2, 4);
4345        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4346                                PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4347                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4349                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4351                                PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352                                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4353                                VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354                                VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4355                                READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356                                READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4357                                WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358                                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4359
4360        si_pcie_gart_tlb_flush(rdev);
4361        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4362                 (unsigned)(rdev->mc.gtt_size >> 20),
4363                 (unsigned long long)rdev->gart.table_addr);
4364        rdev->gart.ready = true;
4365        return 0;
4366}
4367
4368static void si_pcie_gart_disable(struct radeon_device *rdev)
4369{
4370        unsigned i;
4371
4372        for (i = 1; i < 16; ++i) {
4373                uint32_t reg;
4374                if (i < 8)
4375                        reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4376                else
4377                        reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4378                rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4379        }
4380
4381        /* Disable all tables */
4382        WREG32(VM_CONTEXT0_CNTL, 0);
4383        WREG32(VM_CONTEXT1_CNTL, 0);
4384        /* Setup TLB control */
4385        WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4386               SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4387        /* Setup L2 cache */
4388        WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4389               ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4390               EFFECTIVE_L2_QUEUE_SIZE(7) |
4391               CONTEXT1_IDENTITY_ACCESS_MODE(1));
4392        WREG32(VM_L2_CNTL2, 0);
4393        WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4394               L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4395        radeon_gart_table_vram_unpin(rdev);
4396}
4397
4398static void si_pcie_gart_fini(struct radeon_device *rdev)
4399{
4400        si_pcie_gart_disable(rdev);
4401        radeon_gart_table_vram_free(rdev);
4402        radeon_gart_fini(rdev);
4403}
4404
4405/* vm parser */
4406static bool si_vm_reg_valid(u32 reg)
4407{
4408        /* context regs are fine */
4409        if (reg >= 0x28000)
4410                return true;
4411
4412        /* shader regs are also fine */
4413        if (reg >= 0xB000 && reg < 0xC000)
4414                return true;
4415
4416        /* check config regs */
4417        switch (reg) {
4418        case GRBM_GFX_INDEX:
4419        case CP_STRMOUT_CNTL:
4420        case VGT_VTX_VECT_EJECT_REG:
4421        case VGT_CACHE_INVALIDATION:
4422        case VGT_ESGS_RING_SIZE:
4423        case VGT_GSVS_RING_SIZE:
4424        case VGT_GS_VERTEX_REUSE:
4425        case VGT_PRIMITIVE_TYPE:
4426        case VGT_INDEX_TYPE:
4427        case VGT_NUM_INDICES:
4428        case VGT_NUM_INSTANCES:
4429        case VGT_TF_RING_SIZE:
4430        case VGT_HS_OFFCHIP_PARAM:
4431        case VGT_TF_MEMORY_BASE:
4432        case PA_CL_ENHANCE:
4433        case PA_SU_LINE_STIPPLE_VALUE:
4434        case PA_SC_LINE_STIPPLE_STATE:
4435        case PA_SC_ENHANCE:
4436        case SQC_CACHES:
4437        case SPI_STATIC_THREAD_MGMT_1:
4438        case SPI_STATIC_THREAD_MGMT_2:
4439        case SPI_STATIC_THREAD_MGMT_3:
4440        case SPI_PS_MAX_WAVE_ID:
4441        case SPI_CONFIG_CNTL:
4442        case SPI_CONFIG_CNTL_1:
4443        case TA_CNTL_AUX:
4444        case TA_CS_BC_BASE_ADDR:
4445                return true;
4446        default:
4447                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4448                return false;
4449        }
4450}
4451
4452static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4453                                  u32 *ib, struct radeon_cs_packet *pkt)
4454{
4455        switch (pkt->opcode) {
4456        case PACKET3_NOP:
4457        case PACKET3_SET_BASE:
4458        case PACKET3_SET_CE_DE_COUNTERS:
4459        case PACKET3_LOAD_CONST_RAM:
4460        case PACKET3_WRITE_CONST_RAM:
4461        case PACKET3_WRITE_CONST_RAM_OFFSET:
4462        case PACKET3_DUMP_CONST_RAM:
4463        case PACKET3_INCREMENT_CE_COUNTER:
4464        case PACKET3_WAIT_ON_DE_COUNTER:
4465        case PACKET3_CE_WRITE:
4466                break;
4467        default:
4468                DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4469                return -EINVAL;
4470        }
4471        return 0;
4472}
4473
4474static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4475{
4476        u32 start_reg, reg, i;
4477        u32 command = ib[idx + 4];
4478        u32 info = ib[idx + 1];
4479        u32 idx_value = ib[idx];
4480        if (command & PACKET3_CP_DMA_CMD_SAS) {
4481                /* src address space is register */
4482                if (((info & 0x60000000) >> 29) == 0) {
4483                        start_reg = idx_value << 2;
4484                        if (command & PACKET3_CP_DMA_CMD_SAIC) {
4485                                reg = start_reg;
4486                                if (!si_vm_reg_valid(reg)) {
4487                                        DRM_ERROR("CP DMA Bad SRC register\n");
4488                                        return -EINVAL;
4489                                }
4490                        } else {
4491                                for (i = 0; i < (command & 0x1fffff); i++) {
4492                                        reg = start_reg + (4 * i);
4493                                        if (!si_vm_reg_valid(reg)) {
4494                                                DRM_ERROR("CP DMA Bad SRC register\n");
4495                                                return -EINVAL;
4496                                        }
4497                                }
4498                        }
4499                }
4500        }
4501        if (command & PACKET3_CP_DMA_CMD_DAS) {
4502                /* dst address space is register */
4503                if (((info & 0x00300000) >> 20) == 0) {
4504                        start_reg = ib[idx + 2];
4505                        if (command & PACKET3_CP_DMA_CMD_DAIC) {
4506                                reg = start_reg;
4507                                if (!si_vm_reg_valid(reg)) {
4508                                        DRM_ERROR("CP DMA Bad DST register\n");
4509                                        return -EINVAL;
4510                                }
4511                        } else {
4512                                for (i = 0; i < (command & 0x1fffff); i++) {
4513                                        reg = start_reg + (4 * i);
4514                                        if (!si_vm_reg_valid(reg)) {
4515                                                DRM_ERROR("CP DMA Bad DST register\n");
4516                                                return -EINVAL;
4517                                        }
4518                                }
4519                        }
4520                }
4521        }
4522        return 0;
4523}
4524
4525static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4526                                   u32 *ib, struct radeon_cs_packet *pkt)
4527{
4528        int r;
4529        u32 idx = pkt->idx + 1;
4530        u32 idx_value = ib[idx];
4531        u32 start_reg, end_reg, reg, i;
4532
4533        switch (pkt->opcode) {
4534        case PACKET3_NOP:
4535        case PACKET3_SET_BASE:
4536        case PACKET3_CLEAR_STATE:
4537        case PACKET3_INDEX_BUFFER_SIZE:
4538        case PACKET3_DISPATCH_DIRECT:
4539        case PACKET3_DISPATCH_INDIRECT:
4540        case PACKET3_ALLOC_GDS:
4541        case PACKET3_WRITE_GDS_RAM:
4542        case PACKET3_ATOMIC_GDS:
4543        case PACKET3_ATOMIC:
4544        case PACKET3_OCCLUSION_QUERY:
4545        case PACKET3_SET_PREDICATION:
4546        case PACKET3_COND_EXEC:
4547        case PACKET3_PRED_EXEC:
4548        case PACKET3_DRAW_INDIRECT:
4549        case PACKET3_DRAW_INDEX_INDIRECT:
4550        case PACKET3_INDEX_BASE:
4551        case PACKET3_DRAW_INDEX_2:
4552        case PACKET3_CONTEXT_CONTROL:
4553        case PACKET3_INDEX_TYPE:
4554        case PACKET3_DRAW_INDIRECT_MULTI:
4555        case PACKET3_DRAW_INDEX_AUTO:
4556        case PACKET3_DRAW_INDEX_IMMD:
4557        case PACKET3_NUM_INSTANCES:
4558        case PACKET3_DRAW_INDEX_MULTI_AUTO:
4559        case PACKET3_STRMOUT_BUFFER_UPDATE:
4560        case PACKET3_DRAW_INDEX_OFFSET_2:
4561        case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4562        case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4563        case PACKET3_MPEG_INDEX:
4564        case PACKET3_WAIT_REG_MEM:
4565        case PACKET3_MEM_WRITE:
4566        case PACKET3_PFP_SYNC_ME:
4567        case PACKET3_SURFACE_SYNC:
4568        case PACKET3_EVENT_WRITE:
4569        case PACKET3_EVENT_WRITE_EOP:
4570        case PACKET3_EVENT_WRITE_EOS:
4571        case PACKET3_SET_CONTEXT_REG:
4572        case PACKET3_SET_CONTEXT_REG_INDIRECT:
4573        case PACKET3_SET_SH_REG:
4574        case PACKET3_SET_SH_REG_OFFSET:
4575        case PACKET3_INCREMENT_DE_COUNTER:
4576        case PACKET3_WAIT_ON_CE_COUNTER:
4577        case PACKET3_WAIT_ON_AVAIL_BUFFER:
4578        case PACKET3_ME_WRITE:
4579                break;
4580        case PACKET3_COPY_DATA:
4581                if ((idx_value & 0xf00) == 0) {
4582                        reg = ib[idx + 3] * 4;
4583                        if (!si_vm_reg_valid(reg))
4584                                return -EINVAL;
4585                }
4586                break;
4587        case PACKET3_WRITE_DATA:
4588                if ((idx_value & 0xf00) == 0) {
4589                        start_reg = ib[idx + 1] * 4;
4590                        if (idx_value & 0x10000) {
4591                                if (!si_vm_reg_valid(start_reg))
4592                                        return -EINVAL;
4593                        } else {
4594                                for (i = 0; i < (pkt->count - 2); i++) {
4595                                        reg = start_reg + (4 * i);
4596                                        if (!si_vm_reg_valid(reg))
4597                                                return -EINVAL;
4598                                }
4599                        }
4600                }
4601                break;
4602        case PACKET3_COND_WRITE:
4603                if (idx_value & 0x100) {
4604                        reg = ib[idx + 5] * 4;
4605                        if (!si_vm_reg_valid(reg))
4606                                return -EINVAL;
4607                }
4608                break;
4609        case PACKET3_COPY_DW:
4610                if (idx_value & 0x2) {
4611                        reg = ib[idx + 3] * 4;
4612                        if (!si_vm_reg_valid(reg))
4613                                return -EINVAL;
4614                }
4615                break;
4616        case PACKET3_SET_CONFIG_REG:
4617                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4618                end_reg = 4 * pkt->count + start_reg - 4;
4619                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4620                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4621                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4622                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4623                        return -EINVAL;
4624                }
4625                for (i = 0; i < pkt->count; i++) {
4626                        reg = start_reg + (4 * i);
4627                        if (!si_vm_reg_valid(reg))
4628                                return -EINVAL;
4629                }
4630                break;
4631        case PACKET3_CP_DMA:
4632                r = si_vm_packet3_cp_dma_check(ib, idx);
4633                if (r)
4634                        return r;
4635                break;
4636        default:
4637                DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4638                return -EINVAL;
4639        }
4640        return 0;
4641}
4642
4643static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4644                                       u32 *ib, struct radeon_cs_packet *pkt)
4645{
4646        int r;
4647        u32 idx = pkt->idx + 1;
4648        u32 idx_value = ib[idx];
4649        u32 start_reg, reg, i;
4650
4651        switch (pkt->opcode) {
4652        case PACKET3_NOP:
4653        case PACKET3_SET_BASE:
4654        case PACKET3_CLEAR_STATE:
4655        case PACKET3_DISPATCH_DIRECT:
4656        case PACKET3_DISPATCH_INDIRECT:
4657        case PACKET3_ALLOC_GDS:
4658        case PACKET3_WRITE_GDS_RAM:
4659        case PACKET3_ATOMIC_GDS:
4660        case PACKET3_ATOMIC:
4661        case PACKET3_OCCLUSION_QUERY:
4662        case PACKET3_SET_PREDICATION:
4663        case PACKET3_COND_EXEC:
4664        case PACKET3_PRED_EXEC:
4665        case PACKET3_CONTEXT_CONTROL:
4666        case PACKET3_STRMOUT_BUFFER_UPDATE:
4667        case PACKET3_WAIT_REG_MEM:
4668        case PACKET3_MEM_WRITE:
4669        case PACKET3_PFP_SYNC_ME:
4670        case PACKET3_SURFACE_SYNC:
4671        case PACKET3_EVENT_WRITE:
4672        case PACKET3_EVENT_WRITE_EOP:
4673        case PACKET3_EVENT_WRITE_EOS:
4674        case PACKET3_SET_CONTEXT_REG:
4675        case PACKET3_SET_CONTEXT_REG_INDIRECT:
4676        case PACKET3_SET_SH_REG:
4677        case PACKET3_SET_SH_REG_OFFSET:
4678        case PACKET3_INCREMENT_DE_COUNTER:
4679        case PACKET3_WAIT_ON_CE_COUNTER:
4680        case PACKET3_WAIT_ON_AVAIL_BUFFER:
4681        case PACKET3_ME_WRITE:
4682                break;
4683        case PACKET3_COPY_DATA:
4684                if ((idx_value & 0xf00) == 0) {
4685                        reg = ib[idx + 3] * 4;
4686                        if (!si_vm_reg_valid(reg))
4687                                return -EINVAL;
4688                }
4689                break;
4690        case PACKET3_WRITE_DATA:
4691                if ((idx_value & 0xf00) == 0) {
4692                        start_reg = ib[idx + 1] * 4;
4693                        if (idx_value & 0x10000) {
4694                                if (!si_vm_reg_valid(start_reg))
4695                                        return -EINVAL;
4696                        } else {
4697                                for (i = 0; i < (pkt->count - 2); i++) {
4698                                        reg = start_reg + (4 * i);
4699                                        if (!si_vm_reg_valid(reg))
4700                                                return -EINVAL;
4701                                }
4702                        }
4703                }
4704                break;
4705        case PACKET3_COND_WRITE:
4706                if (idx_value & 0x100) {
4707                        reg = ib[idx + 5] * 4;
4708                        if (!si_vm_reg_valid(reg))
4709                                return -EINVAL;
4710                }
4711                break;
4712        case PACKET3_COPY_DW:
4713                if (idx_value & 0x2) {
4714                        reg = ib[idx + 3] * 4;
4715                        if (!si_vm_reg_valid(reg))
4716                                return -EINVAL;
4717                }
4718                break;
4719        case PACKET3_CP_DMA:
4720                r = si_vm_packet3_cp_dma_check(ib, idx);
4721                if (r)
4722                        return r;
4723                break;
4724        default:
4725                DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4726                return -EINVAL;
4727        }
4728        return 0;
4729}
4730
4731int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4732{
4733        int ret = 0;
4734        u32 idx = 0, i;
4735        struct radeon_cs_packet pkt;
4736
4737        do {
4738                pkt.idx = idx;
4739                pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4740                pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4741                pkt.one_reg_wr = 0;
4742                switch (pkt.type) {
4743                case RADEON_PACKET_TYPE0:
4744                        dev_err(rdev->dev, "Packet0 not allowed!\n");
4745                        ret = -EINVAL;
4746                        break;
4747                case RADEON_PACKET_TYPE2:
4748                        idx += 1;
4749                        break;
4750                case RADEON_PACKET_TYPE3:
4751                        pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4752                        if (ib->is_const_ib)
4753                                ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4754                        else {
4755                                switch (ib->ring) {
4756                                case RADEON_RING_TYPE_GFX_INDEX:
4757                                        ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4758                                        break;
4759                                case CAYMAN_RING_TYPE_CP1_INDEX:
4760                                case CAYMAN_RING_TYPE_CP2_INDEX:
4761                                        ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4762                                        break;
4763                                default:
4764                                        dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4765                                        ret = -EINVAL;
4766                                        break;
4767                                }
4768                        }
4769                        idx += pkt.count + 2;
4770                        break;
4771                default:
4772                        dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4773                        ret = -EINVAL;
4774                        break;
4775                }
4776                if (ret) {
4777                        for (i = 0; i < ib->length_dw; i++) {
4778                                if (i == idx)
4779                                        printk("\t0x%08x <---\n", ib->ptr[i]);
4780                                else
4781                                        printk("\t0x%08x\n", ib->ptr[i]);
4782                        }
4783                        break;
4784                }
4785        } while (idx < ib->length_dw);
4786
4787        return ret;
4788}
4789
4790/*
4791 * vm
4792 */
4793int si_vm_init(struct radeon_device *rdev)
4794{
4795        /* number of VMs */
4796        rdev->vm_manager.nvm = 16;
4797        /* base offset of vram pages */
4798        rdev->vm_manager.vram_base_offset = 0;
4799
4800        return 0;
4801}
4802
4803void si_vm_fini(struct radeon_device *rdev)
4804{
4805}
4806
4807/**
4808 * si_vm_decode_fault - print human readable fault info
4809 *
4810 * @rdev: radeon_device pointer
4811 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4812 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4813 *
4814 * Print human readable fault information (SI).
4815 */
4816static void si_vm_decode_fault(struct radeon_device *rdev,
4817                               u32 status, u32 addr)
4818{
4819        u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4820        u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4821        u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4822        char *block;
4823
4824        if (rdev->family == CHIP_TAHITI) {
4825                switch (mc_id) {
4826                case 160:
4827                case 144:
4828                case 96:
4829                case 80:
4830                case 224:
4831                case 208:
4832                case 32:
4833                case 16:
4834                        block = "CB";
4835                        break;
4836                case 161:
4837                case 145:
4838                case 97:
4839                case 81:
4840                case 225:
4841                case 209:
4842                case 33:
4843                case 17:
4844                        block = "CB_FMASK";
4845                        break;
4846                case 162:
4847                case 146:
4848                case 98:
4849                case 82:
4850                case 226:
4851                case 210:
4852                case 34:
4853                case 18:
4854                        block = "CB_CMASK";
4855                        break;
4856                case 163:
4857                case 147:
4858                case 99:
4859                case 83:
4860                case 227:
4861                case 211:
4862                case 35:
4863                case 19:
4864                        block = "CB_IMMED";
4865                        break;
4866                case 164:
4867                case 148:
4868                case 100:
4869                case 84:
4870                case 228:
4871                case 212:
4872                case 36:
4873                case 20:
4874                        block = "DB";
4875                        break;
4876                case 165:
4877                case 149:
4878                case 101:
4879                case 85:
4880                case 229:
4881                case 213:
4882                case 37:
4883                case 21:
4884                        block = "DB_HTILE";
4885                        break;
4886                case 167:
4887                case 151:
4888                case 103:
4889                case 87:
4890                case 231:
4891                case 215:
4892                case 39:
4893                case 23:
4894                        block = "DB_STEN";
4895                        break;
4896                case 72:
4897                case 68:
4898                case 64:
4899                case 8:
4900                case 4:
4901                case 0:
4902                case 136:
4903                case 132:
4904                case 128:
4905                case 200:
4906                case 196:
4907                case 192:
4908                        block = "TC";
4909                        break;
4910                case 112:
4911                case 48:
4912                        block = "CP";
4913                        break;
4914                case 49:
4915                case 177:
4916                case 50:
4917                case 178:
4918                        block = "SH";
4919                        break;
4920                case 53:
4921                case 190:
4922                        block = "VGT";
4923                        break;
4924                case 117:
4925                        block = "IH";
4926                        break;
4927                case 51:
4928                case 115:
4929                        block = "RLC";
4930                        break;
4931                case 119:
4932                case 183:
4933                        block = "DMA0";
4934                        break;
4935                case 61:
4936                        block = "DMA1";
4937                        break;
4938                case 248:
4939                case 120:
4940                        block = "HDP";
4941                        break;
4942                default:
4943                        block = "unknown";
4944                        break;
4945                }
4946        } else {
4947                switch (mc_id) {
4948                case 32:
4949                case 16:
4950                case 96:
4951                case 80:
4952                case 160:
4953                case 144:
4954                case 224:
4955                case 208:
4956                        block = "CB";
4957                        break;
4958                case 33:
4959                case 17:
4960                case 97:
4961                case 81:
4962                case 161:
4963                case 145:
4964                case 225:
4965                case 209:
4966                        block = "CB_FMASK";
4967                        break;
4968                case 34:
4969                case 18:
4970                case 98:
4971                case 82:
4972                case 162:
4973                case 146:
4974                case 226:
4975                case 210:
4976                        block = "CB_CMASK";
4977                        break;
4978                case 35:
4979                case 19:
4980                case 99:
4981                case 83:
4982                case 163:
4983                case 147:
4984                case 227:
4985                case 211:
4986                        block = "CB_IMMED";
4987                        break;
4988                case 36:
4989                case 20:
4990                case 100:
4991                case 84:
4992                case 164:
4993                case 148:
4994                case 228:
4995                case 212:
4996                        block = "DB";
4997                        break;
4998                case 37:
4999                case 21:
5000                case 101:
5001                case 85:
5002                case 165:
5003                case 149:
5004                case 229:
5005                case 213:
5006                        block = "DB_HTILE";
5007                        break;
5008                case 39:
5009                case 23:
5010                case 103:
5011                case 87:
5012                case 167:
5013                case 151:
5014                case 231:
5015                case 215:
5016                        block = "DB_STEN";
5017                        break;
5018                case 72:
5019                case 68:
5020                case 8:
5021                case 4:
5022                case 136:
5023                case 132:
5024                case 200:
5025                case 196:
5026                        block = "TC";
5027                        break;
5028                case 112:
5029                case 48:
5030                        block = "CP";
5031                        break;
5032                case 49:
5033                case 177:
5034                case 50:
5035                case 178:
5036                        block = "SH";
5037                        break;
5038                case 53:
5039                        block = "VGT";
5040                        break;
5041                case 117:
5042                        block = "IH";
5043                        break;
5044                case 51:
5045                case 115:
5046                        block = "RLC";
5047                        break;
5048                case 119:
5049                case 183:
5050                        block = "DMA0";
5051                        break;
5052                case 61:
5053                        block = "DMA1";
5054                        break;
5055                case 248:
5056                case 120:
5057                        block = "HDP";
5058                        break;
5059                default:
5060                        block = "unknown";
5061                        break;
5062                }
5063        }
5064
5065        printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5066               protections, vmid, addr,
5067               (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5068               block, mc_id);
5069}
5070
5071void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5072                 unsigned vm_id, uint64_t pd_addr)
5073{
5074        /* write new base address */
5075        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077                                 WRITE_DATA_DST_SEL(0)));
5078
5079        if (vm_id < 8) {
5080                radeon_ring_write(ring,
5081                                  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5082        } else {
5083                radeon_ring_write(ring,
5084                                  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5085        }
5086        radeon_ring_write(ring, 0);
5087        radeon_ring_write(ring, pd_addr >> 12);
5088
5089        /* flush hdp cache */
5090        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5091        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5092                                 WRITE_DATA_DST_SEL(0)));
5093        radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5094        radeon_ring_write(ring, 0);
5095        radeon_ring_write(ring, 0x1);
5096
5097        /* bits 0-15 are the VM contexts0-15 */
5098        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5099        radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5100                                 WRITE_DATA_DST_SEL(0)));
5101        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5102        radeon_ring_write(ring, 0);
5103        radeon_ring_write(ring, 1 << vm_id);
5104
5105        /* wait for the invalidate to complete */
5106        radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5107        radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5108                                 WAIT_REG_MEM_ENGINE(0))); /* me */
5109        radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5110        radeon_ring_write(ring, 0);
5111        radeon_ring_write(ring, 0); /* ref */
5112        radeon_ring_write(ring, 0); /* mask */
5113        radeon_ring_write(ring, 0x20); /* poll interval */
5114
5115        /* sync PFP to ME, otherwise we might get invalid PFP reads */
5116        radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5117        radeon_ring_write(ring, 0x0);
5118}
5119
5120/*
5121 *  Power and clock gating
5122 */
5123static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5124{
5125        int i;
5126
5127        for (i = 0; i < rdev->usec_timeout; i++) {
5128                if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5129                        break;
5130                udelay(1);
5131        }
5132
5133        for (i = 0; i < rdev->usec_timeout; i++) {
5134                if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5135                        break;
5136                udelay(1);
5137        }
5138}
5139
5140static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5141                                         bool enable)
5142{
5143        u32 tmp = RREG32(CP_INT_CNTL_RING0);
5144        u32 mask;
5145        int i;
5146
5147        if (enable)
5148                tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5149        else
5150                tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5151        WREG32(CP_INT_CNTL_RING0, tmp);
5152
5153        if (!enable) {
5154                /* read a gfx register */
5155                tmp = RREG32(DB_DEPTH_INFO);
5156
5157                mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5158                for (i = 0; i < rdev->usec_timeout; i++) {
5159                        if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5160                                break;
5161                        udelay(1);
5162                }
5163        }
5164}
5165
5166static void si_set_uvd_dcm(struct radeon_device *rdev,
5167                           bool sw_mode)
5168{
5169        u32 tmp, tmp2;
5170
5171        tmp = RREG32(UVD_CGC_CTRL);
5172        tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5173        tmp |= DCM | CG_DT(1) | CLK_OD(4);
5174
5175        if (sw_mode) {
5176                tmp &= ~0x7ffff800;
5177                tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5178        } else {
5179                tmp |= 0x7ffff800;
5180                tmp2 = 0;
5181        }
5182
5183        WREG32(UVD_CGC_CTRL, tmp);
5184        WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5185}
5186
5187void si_init_uvd_internal_cg(struct radeon_device *rdev)
5188{
5189        bool hw_mode = true;
5190
5191        if (hw_mode) {
5192                si_set_uvd_dcm(rdev, false);
5193        } else {
5194                u32 tmp = RREG32(UVD_CGC_CTRL);
5195                tmp &= ~DCM;
5196                WREG32(UVD_CGC_CTRL, tmp);
5197        }
5198}
5199
5200static u32 si_halt_rlc(struct radeon_device *rdev)
5201{
5202        u32 data, orig;
5203
5204        orig = data = RREG32(RLC_CNTL);
5205
5206        if (data & RLC_ENABLE) {
5207                data &= ~RLC_ENABLE;
5208                WREG32(RLC_CNTL, data);
5209
5210                si_wait_for_rlc_serdes(rdev);
5211        }
5212
5213        return orig;
5214}
5215
5216static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5217{
5218        u32 tmp;
5219
5220        tmp = RREG32(RLC_CNTL);
5221        if (tmp != rlc)
5222                WREG32(RLC_CNTL, rlc);
5223}
5224
5225static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5226{
5227        u32 data, orig;
5228
5229        orig = data = RREG32(DMA_PG);
5230        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5231                data |= PG_CNTL_ENABLE;
5232        else
5233                data &= ~PG_CNTL_ENABLE;
5234        if (orig != data)
5235                WREG32(DMA_PG, data);
5236}
5237
5238static void si_init_dma_pg(struct radeon_device *rdev)
5239{
5240        u32 tmp;
5241
5242        WREG32(DMA_PGFSM_WRITE,  0x00002000);
5243        WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5244
5245        for (tmp = 0; tmp < 5; tmp++)
5246                WREG32(DMA_PGFSM_WRITE, 0);
5247}
5248
5249static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5250                               bool enable)
5251{
5252        u32 tmp;
5253
5254        if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5255                tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5256                WREG32(RLC_TTOP_D, tmp);
5257
5258                tmp = RREG32(RLC_PG_CNTL);
5259                tmp |= GFX_PG_ENABLE;
5260                WREG32(RLC_PG_CNTL, tmp);
5261
5262                tmp = RREG32(RLC_AUTO_PG_CTRL);
5263                tmp |= AUTO_PG_EN;
5264                WREG32(RLC_AUTO_PG_CTRL, tmp);
5265        } else {
5266                tmp = RREG32(RLC_AUTO_PG_CTRL);
5267                tmp &= ~AUTO_PG_EN;
5268                WREG32(RLC_AUTO_PG_CTRL, tmp);
5269
5270                tmp = RREG32(DB_RENDER_CONTROL);
5271        }
5272}
5273
5274static void si_init_gfx_cgpg(struct radeon_device *rdev)
5275{
5276        u32 tmp;
5277
5278        WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5279
5280        tmp = RREG32(RLC_PG_CNTL);
5281        tmp |= GFX_PG_SRC;
5282        WREG32(RLC_PG_CNTL, tmp);
5283
5284        WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5285
5286        tmp = RREG32(RLC_AUTO_PG_CTRL);
5287
5288        tmp &= ~GRBM_REG_SGIT_MASK;
5289        tmp |= GRBM_REG_SGIT(0x700);
5290        tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5291        WREG32(RLC_AUTO_PG_CTRL, tmp);
5292}
5293
5294static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5295{
5296        u32 mask = 0, tmp, tmp1;
5297        int i;
5298
5299        si_select_se_sh(rdev, se, sh);
5300        tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5301        tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5302        si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5303
5304        tmp &= 0xffff0000;
5305
5306        tmp |= tmp1;
5307        tmp >>= 16;
5308
5309        for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5310                mask <<= 1;
5311                mask |= 1;
5312        }
5313
5314        return (~tmp) & mask;
5315}
5316
5317static void si_init_ao_cu_mask(struct radeon_device *rdev)
5318{
5319        u32 i, j, k, active_cu_number = 0;
5320        u32 mask, counter, cu_bitmap;
5321        u32 tmp = 0;
5322
5323        for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5324                for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5325                        mask = 1;
5326                        cu_bitmap = 0;
5327                        counter  = 0;
5328                        for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5329                                if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5330                                        if (counter < 2)
5331                                                cu_bitmap |= mask;
5332                                        counter++;
5333                                }
5334                                mask <<= 1;
5335                        }
5336
5337                        active_cu_number += counter;
5338                        tmp |= (cu_bitmap << (i * 16 + j * 8));
5339                }
5340        }
5341
5342        WREG32(RLC_PG_AO_CU_MASK, tmp);
5343
5344        tmp = RREG32(RLC_MAX_PG_CU);
5345        tmp &= ~MAX_PU_CU_MASK;
5346        tmp |= MAX_PU_CU(active_cu_number);
5347        WREG32(RLC_MAX_PG_CU, tmp);
5348}
5349
5350static void si_enable_cgcg(struct radeon_device *rdev,
5351                           bool enable)
5352{
5353        u32 data, orig, tmp;
5354
5355        orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5356
5357        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5358                si_enable_gui_idle_interrupt(rdev, true);
5359
5360                WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5361
5362                tmp = si_halt_rlc(rdev);
5363
5364                WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5365                WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5366                WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5367
5368                si_wait_for_rlc_serdes(rdev);
5369
5370                si_update_rlc(rdev, tmp);
5371
5372                WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5373
5374                data |= CGCG_EN | CGLS_EN;
5375        } else {
5376                si_enable_gui_idle_interrupt(rdev, false);
5377
5378                RREG32(CB_CGTT_SCLK_CTRL);
5379                RREG32(CB_CGTT_SCLK_CTRL);
5380                RREG32(CB_CGTT_SCLK_CTRL);
5381                RREG32(CB_CGTT_SCLK_CTRL);
5382
5383                data &= ~(CGCG_EN | CGLS_EN);
5384        }
5385
5386        if (orig != data)
5387                WREG32(RLC_CGCG_CGLS_CTRL, data);
5388}
5389
5390static void si_enable_mgcg(struct radeon_device *rdev,
5391                           bool enable)
5392{
5393        u32 data, orig, tmp = 0;
5394
5395        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5396                orig = data = RREG32(CGTS_SM_CTRL_REG);
5397                data = 0x96940200;
5398                if (orig != data)
5399                        WREG32(CGTS_SM_CTRL_REG, data);
5400
5401                if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5402                        orig = data = RREG32(CP_MEM_SLP_CNTL);
5403                        data |= CP_MEM_LS_EN;
5404                        if (orig != data)
5405                                WREG32(CP_MEM_SLP_CNTL, data);
5406                }
5407
5408                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409                data &= 0xffffffc0;
5410                if (orig != data)
5411                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412
5413                tmp = si_halt_rlc(rdev);
5414
5415                WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5416                WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5417                WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5418
5419                si_update_rlc(rdev, tmp);
5420        } else {
5421                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422                data |= 0x00000003;
5423                if (orig != data)
5424                        WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425
5426                data = RREG32(CP_MEM_SLP_CNTL);
5427                if (data & CP_MEM_LS_EN) {
5428                        data &= ~CP_MEM_LS_EN;
5429                        WREG32(CP_MEM_SLP_CNTL, data);
5430                }
5431                orig = data = RREG32(CGTS_SM_CTRL_REG);
5432                data |= LS_OVERRIDE | OVERRIDE;
5433                if (orig != data)
5434                        WREG32(CGTS_SM_CTRL_REG, data);
5435
5436                tmp = si_halt_rlc(rdev);
5437
5438                WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5439                WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5440                WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5441
5442                si_update_rlc(rdev, tmp);
5443        }
5444}
5445
5446static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5447                               bool enable)
5448{
5449        u32 orig, data, tmp;
5450
5451        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5452                tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5453                tmp |= 0x3fff;
5454                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5455
5456                orig = data = RREG32(UVD_CGC_CTRL);
5457                data |= DCM;
5458                if (orig != data)
5459                        WREG32(UVD_CGC_CTRL, data);
5460
5461                WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5462                WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5463        } else {
5464                tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5465                tmp &= ~0x3fff;
5466                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5467
5468                orig = data = RREG32(UVD_CGC_CTRL);
5469                data &= ~DCM;
5470                if (orig != data)
5471                        WREG32(UVD_CGC_CTRL, data);
5472
5473                WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5474                WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5475        }
5476}
5477
5478static const u32 mc_cg_registers[] =
5479{
5480        MC_HUB_MISC_HUB_CG,
5481        MC_HUB_MISC_SIP_CG,
5482        MC_HUB_MISC_VM_CG,
5483        MC_XPB_CLK_GAT,
5484        ATC_MISC_CG,
5485        MC_CITF_MISC_WR_CG,
5486        MC_CITF_MISC_RD_CG,
5487        MC_CITF_MISC_VM_CG,
5488        VM_L2_CG,
5489};
5490
5491static void si_enable_mc_ls(struct radeon_device *rdev,
5492                            bool enable)
5493{
5494        int i;
5495        u32 orig, data;
5496
5497        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5498                orig = data = RREG32(mc_cg_registers[i]);
5499                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5500                        data |= MC_LS_ENABLE;
5501                else
5502                        data &= ~MC_LS_ENABLE;
5503                if (data != orig)
5504                        WREG32(mc_cg_registers[i], data);
5505        }
5506}
5507
5508static void si_enable_mc_mgcg(struct radeon_device *rdev,
5509                               bool enable)
5510{
5511        int i;
5512        u32 orig, data;
5513
5514        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5515                orig = data = RREG32(mc_cg_registers[i]);
5516                if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5517                        data |= MC_CG_ENABLE;
5518                else
5519                        data &= ~MC_CG_ENABLE;
5520                if (data != orig)
5521                        WREG32(mc_cg_registers[i], data);
5522        }
5523}
5524
5525static void si_enable_dma_mgcg(struct radeon_device *rdev,
5526                               bool enable)
5527{
5528        u32 orig, data, offset;
5529        int i;
5530
5531        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5532                for (i = 0; i < 2; i++) {
5533                        if (i == 0)
5534                                offset = DMA0_REGISTER_OFFSET;
5535                        else
5536                                offset = DMA1_REGISTER_OFFSET;
5537                        orig = data = RREG32(DMA_POWER_CNTL + offset);
5538                        data &= ~MEM_POWER_OVERRIDE;
5539                        if (data != orig)
5540                                WREG32(DMA_POWER_CNTL + offset, data);
5541                        WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5542                }
5543        } else {
5544                for (i = 0; i < 2; i++) {
5545                        if (i == 0)
5546                                offset = DMA0_REGISTER_OFFSET;
5547                        else
5548                                offset = DMA1_REGISTER_OFFSET;
5549                        orig = data = RREG32(DMA_POWER_CNTL + offset);
5550                        data |= MEM_POWER_OVERRIDE;
5551                        if (data != orig)
5552                                WREG32(DMA_POWER_CNTL + offset, data);
5553
5554                        orig = data = RREG32(DMA_CLK_CTRL + offset);
5555                        data = 0xff000000;
5556                        if (data != orig)
5557                                WREG32(DMA_CLK_CTRL + offset, data);
5558                }
5559        }
5560}
5561
5562static void si_enable_bif_mgls(struct radeon_device *rdev,
5563                               bool enable)
5564{
5565        u32 orig, data;
5566
5567        orig = data = RREG32_PCIE(PCIE_CNTL2);
5568
5569        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5570                data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5571                        REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5572        else
5573                data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574                          REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5575
5576        if (orig != data)
5577                WREG32_PCIE(PCIE_CNTL2, data);
5578}
5579
5580static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5581                               bool enable)
5582{
5583        u32 orig, data;
5584
5585        orig = data = RREG32(HDP_HOST_PATH_CNTL);
5586
5587        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5588                data &= ~CLOCK_GATING_DIS;
5589        else
5590                data |= CLOCK_GATING_DIS;
5591
5592        if (orig != data)
5593                WREG32(HDP_HOST_PATH_CNTL, data);
5594}
5595
5596static void si_enable_hdp_ls(struct radeon_device *rdev,
5597                             bool enable)
5598{
5599        u32 orig, data;
5600
5601        orig = data = RREG32(HDP_MEM_POWER_LS);
5602
5603        if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5604                data |= HDP_LS_ENABLE;
5605        else
5606                data &= ~HDP_LS_ENABLE;
5607
5608        if (orig != data)
5609                WREG32(HDP_MEM_POWER_LS, data);
5610}
5611
5612static void si_update_cg(struct radeon_device *rdev,
5613                         u32 block, bool enable)
5614{
5615        if (block & RADEON_CG_BLOCK_GFX) {
5616                si_enable_gui_idle_interrupt(rdev, false);
5617                /* order matters! */
5618                if (enable) {
5619                        si_enable_mgcg(rdev, true);
5620                        si_enable_cgcg(rdev, true);
5621                } else {
5622                        si_enable_cgcg(rdev, false);
5623                        si_enable_mgcg(rdev, false);
5624                }
5625                si_enable_gui_idle_interrupt(rdev, true);
5626        }
5627
5628        if (block & RADEON_CG_BLOCK_MC) {
5629                si_enable_mc_mgcg(rdev, enable);
5630                si_enable_mc_ls(rdev, enable);
5631        }
5632
5633        if (block & RADEON_CG_BLOCK_SDMA) {
5634                si_enable_dma_mgcg(rdev, enable);
5635        }
5636
5637        if (block & RADEON_CG_BLOCK_BIF) {
5638                si_enable_bif_mgls(rdev, enable);
5639        }
5640
5641        if (block & RADEON_CG_BLOCK_UVD) {
5642                if (rdev->has_uvd) {
5643                        si_enable_uvd_mgcg(rdev, enable);
5644                }
5645        }
5646
5647        if (block & RADEON_CG_BLOCK_HDP) {
5648                si_enable_hdp_mgcg(rdev, enable);
5649                si_enable_hdp_ls(rdev, enable);
5650        }
5651}
5652
5653static void si_init_cg(struct radeon_device *rdev)
5654{
5655        si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5656                            RADEON_CG_BLOCK_MC |
5657                            RADEON_CG_BLOCK_SDMA |
5658                            RADEON_CG_BLOCK_BIF |
5659                            RADEON_CG_BLOCK_HDP), true);
5660        if (rdev->has_uvd) {
5661                si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5662                si_init_uvd_internal_cg(rdev);
5663        }
5664}
5665
5666static void si_fini_cg(struct radeon_device *rdev)
5667{
5668        if (rdev->has_uvd) {
5669                si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5670        }
5671        si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5672                            RADEON_CG_BLOCK_MC |
5673                            RADEON_CG_BLOCK_SDMA |
5674                            RADEON_CG_BLOCK_BIF |
5675                            RADEON_CG_BLOCK_HDP), false);
5676}
5677
5678u32 si_get_csb_size(struct radeon_device *rdev)
5679{
5680        u32 count = 0;
5681        const struct cs_section_def *sect = NULL;
5682        const struct cs_extent_def *ext = NULL;
5683
5684        if (rdev->rlc.cs_data == NULL)
5685                return 0;
5686
5687        /* begin clear state */
5688        count += 2;
5689        /* context control state */
5690        count += 3;
5691
5692        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5693                for (ext = sect->section; ext->extent != NULL; ++ext) {
5694                        if (sect->id == SECT_CONTEXT)
5695                                count += 2 + ext->reg_count;
5696                        else
5697                                return 0;
5698                }
5699        }
5700        /* pa_sc_raster_config */
5701        count += 3;
5702        /* end clear state */
5703        count += 2;
5704        /* clear state */
5705        count += 2;
5706
5707        return count;
5708}
5709
5710void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5711{
5712        u32 count = 0, i;
5713        const struct cs_section_def *sect = NULL;
5714        const struct cs_extent_def *ext = NULL;
5715
5716        if (rdev->rlc.cs_data == NULL)
5717                return;
5718        if (buffer == NULL)
5719                return;
5720
5721        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5722        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5723
5724        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5725        buffer[count++] = cpu_to_le32(0x80000000);
5726        buffer[count++] = cpu_to_le32(0x80000000);
5727
5728        for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5729                for (ext = sect->section; ext->extent != NULL; ++ext) {
5730                        if (sect->id == SECT_CONTEXT) {
5731                                buffer[count++] =
5732                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5733                                buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5734                                for (i = 0; i < ext->reg_count; i++)
5735                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
5736                        } else {
5737                                return;
5738                        }
5739                }
5740        }
5741
5742        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5743        buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5744        switch (rdev->family) {
5745        case CHIP_TAHITI:
5746        case CHIP_PITCAIRN:
5747                buffer[count++] = cpu_to_le32(0x2a00126a);
5748                break;
5749        case CHIP_VERDE:
5750                buffer[count++] = cpu_to_le32(0x0000124a);
5751                break;
5752        case CHIP_OLAND:
5753                buffer[count++] = cpu_to_le32(0x00000082);
5754                break;
5755        case CHIP_HAINAN:
5756                buffer[count++] = cpu_to_le32(0x00000000);
5757                break;
5758        default:
5759                buffer[count++] = cpu_to_le32(0x00000000);
5760                break;
5761        }
5762
5763        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5764        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5765
5766        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5767        buffer[count++] = cpu_to_le32(0);
5768}
5769
5770static void si_init_pg(struct radeon_device *rdev)
5771{
5772        if (rdev->pg_flags) {
5773                if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5774                        si_init_dma_pg(rdev);
5775                }
5776                si_init_ao_cu_mask(rdev);
5777                if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5778                        si_init_gfx_cgpg(rdev);
5779                } else {
5780                        WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5781                        WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5782                }
5783                si_enable_dma_pg(rdev, true);
5784                si_enable_gfx_cgpg(rdev, true);
5785        } else {
5786                WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5787                WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5788        }
5789}
5790
5791static void si_fini_pg(struct radeon_device *rdev)
5792{
5793        if (rdev->pg_flags) {
5794                si_enable_dma_pg(rdev, false);
5795                si_enable_gfx_cgpg(rdev, false);
5796        }
5797}
5798
5799/*
5800 * RLC
5801 */
5802void si_rlc_reset(struct radeon_device *rdev)
5803{
5804        u32 tmp = RREG32(GRBM_SOFT_RESET);
5805
5806        tmp |= SOFT_RESET_RLC;
5807        WREG32(GRBM_SOFT_RESET, tmp);
5808        udelay(50);
5809        tmp &= ~SOFT_RESET_RLC;
5810        WREG32(GRBM_SOFT_RESET, tmp);
5811        udelay(50);
5812}
5813
5814static void si_rlc_stop(struct radeon_device *rdev)
5815{
5816        WREG32(RLC_CNTL, 0);
5817
5818        si_enable_gui_idle_interrupt(rdev, false);
5819
5820        si_wait_for_rlc_serdes(rdev);
5821}
5822
5823static void si_rlc_start(struct radeon_device *rdev)
5824{
5825        WREG32(RLC_CNTL, RLC_ENABLE);
5826
5827        si_enable_gui_idle_interrupt(rdev, true);
5828
5829        udelay(50);
5830}
5831
5832static bool si_lbpw_supported(struct radeon_device *rdev)
5833{
5834        u32 tmp;
5835
5836        /* Enable LBPW only for DDR3 */
5837        tmp = RREG32(MC_SEQ_MISC0);
5838        if ((tmp & 0xF0000000) == 0xB0000000)
5839                return true;
5840        return false;
5841}
5842
5843static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5844{
5845        u32 tmp;
5846
5847        tmp = RREG32(RLC_LB_CNTL);
5848        if (enable)
5849                tmp |= LOAD_BALANCE_ENABLE;
5850        else
5851                tmp &= ~LOAD_BALANCE_ENABLE;
5852        WREG32(RLC_LB_CNTL, tmp);
5853
5854        if (!enable) {
5855                si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5856                WREG32(SPI_LB_CU_MASK, 0x00ff);
5857        }
5858}
5859
5860static int si_rlc_resume(struct radeon_device *rdev)
5861{
5862        u32 i;
5863
5864        if (!rdev->rlc_fw)
5865                return -EINVAL;
5866
5867        si_rlc_stop(rdev);
5868
5869        si_rlc_reset(rdev);
5870
5871        si_init_pg(rdev);
5872
5873        si_init_cg(rdev);
5874
5875        WREG32(RLC_RL_BASE, 0);
5876        WREG32(RLC_RL_SIZE, 0);
5877        WREG32(RLC_LB_CNTL, 0);
5878        WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5879        WREG32(RLC_LB_CNTR_INIT, 0);
5880        WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5881
5882        WREG32(RLC_MC_CNTL, 0);
5883        WREG32(RLC_UCODE_CNTL, 0);
5884
5885        if (rdev->new_fw) {
5886                const struct rlc_firmware_header_v1_0 *hdr =
5887                        (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5888                u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5889                const __le32 *fw_data = (const __le32 *)
5890                        (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5891
5892                radeon_ucode_print_rlc_hdr(&hdr->header);
5893
5894                for (i = 0; i < fw_size; i++) {
5895                        WREG32(RLC_UCODE_ADDR, i);
5896                        WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5897                }
5898        } else {
5899                const __be32 *fw_data =
5900                        (const __be32 *)rdev->rlc_fw->data;
5901                for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5902                        WREG32(RLC_UCODE_ADDR, i);
5903                        WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5904                }
5905        }
5906        WREG32(RLC_UCODE_ADDR, 0);
5907
5908        si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5909
5910        si_rlc_start(rdev);
5911
5912        return 0;
5913}
5914
5915static void si_enable_interrupts(struct radeon_device *rdev)
5916{
5917        u32 ih_cntl = RREG32(IH_CNTL);
5918        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919
5920        ih_cntl |= ENABLE_INTR;
5921        ih_rb_cntl |= IH_RB_ENABLE;
5922        WREG32(IH_CNTL, ih_cntl);
5923        WREG32(IH_RB_CNTL, ih_rb_cntl);
5924        rdev->ih.enabled = true;
5925}
5926
5927static void si_disable_interrupts(struct radeon_device *rdev)
5928{
5929        u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5930        u32 ih_cntl = RREG32(IH_CNTL);
5931
5932        ih_rb_cntl &= ~IH_RB_ENABLE;
5933        ih_cntl &= ~ENABLE_INTR;
5934        WREG32(IH_RB_CNTL, ih_rb_cntl);
5935        WREG32(IH_CNTL, ih_cntl);
5936        /* set rptr, wptr to 0 */
5937        WREG32(IH_RB_RPTR, 0);
5938        WREG32(IH_RB_WPTR, 0);
5939        rdev->ih.enabled = false;
5940        rdev->ih.rptr = 0;
5941}
5942
5943static void si_disable_interrupt_state(struct radeon_device *rdev)
5944{
5945        int i;
5946        u32 tmp;
5947
5948        tmp = RREG32(CP_INT_CNTL_RING0) &
5949                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5950        WREG32(CP_INT_CNTL_RING0, tmp);
5951        WREG32(CP_INT_CNTL_RING1, 0);
5952        WREG32(CP_INT_CNTL_RING2, 0);
5953        tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5954        WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5955        tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956        WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5957        WREG32(GRBM_INT_CNTL, 0);
5958        WREG32(SRBM_INT_CNTL, 0);
5959        for (i = 0; i < rdev->num_crtc; i++)
5960                WREG32(INT_MASK + crtc_offsets[i], 0);
5961        for (i = 0; i < rdev->num_crtc; i++)
5962                WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5963
5964        if (!ASIC_IS_NODCE(rdev)) {
5965                WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5966
5967                for (i = 0; i < 6; i++)
5968                        WREG32_AND(DC_HPDx_INT_CONTROL(i),
5969                                   DC_HPDx_INT_POLARITY);
5970        }
5971}
5972
5973static int si_irq_init(struct radeon_device *rdev)
5974{
5975        int ret = 0;
5976        int rb_bufsz;
5977        u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5978
5979        /* allocate ring */
5980        ret = r600_ih_ring_alloc(rdev);
5981        if (ret)
5982                return ret;
5983
5984        /* disable irqs */
5985        si_disable_interrupts(rdev);
5986
5987        /* init rlc */
5988        ret = si_rlc_resume(rdev);
5989        if (ret) {
5990                r600_ih_ring_fini(rdev);
5991                return ret;
5992        }
5993
5994        /* setup interrupt control */
5995        /* set dummy read address to dummy page address */
5996        WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5997        interrupt_cntl = RREG32(INTERRUPT_CNTL);
5998        /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5999         * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6000         */
6001        interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6002        /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6003        interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6004        WREG32(INTERRUPT_CNTL, interrupt_cntl);
6005
6006        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6007        rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6008
6009        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6010                      IH_WPTR_OVERFLOW_CLEAR |
6011                      (rb_bufsz << 1));
6012
6013        if (rdev->wb.enabled)
6014                ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6015
6016        /* set the writeback address whether it's enabled or not */
6017        WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6018        WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6019
6020        WREG32(IH_RB_CNTL, ih_rb_cntl);
6021
6022        /* set rptr, wptr to 0 */
6023        WREG32(IH_RB_RPTR, 0);
6024        WREG32(IH_RB_WPTR, 0);
6025
6026        /* Default settings for IH_CNTL (disabled at first) */
6027        ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6028        /* RPTR_REARM only works if msi's are enabled */
6029        if (rdev->msi_enabled)
6030                ih_cntl |= RPTR_REARM;
6031        WREG32(IH_CNTL, ih_cntl);
6032
6033        /* force the active interrupt state to all disabled */
6034        si_disable_interrupt_state(rdev);
6035
6036        pci_set_master(rdev->pdev);
6037
6038        /* enable irqs */
6039        si_enable_interrupts(rdev);
6040
6041        return ret;
6042}
6043
6044/* The order we write back each register here is important */
6045int si_irq_set(struct radeon_device *rdev)
6046{
6047        int i;
6048        u32 cp_int_cntl;
6049        u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6050        u32 grbm_int_cntl = 0;
6051        u32 dma_cntl, dma_cntl1;
6052        u32 thermal_int = 0;
6053
6054        if (!rdev->irq.installed) {
6055                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6056                return -EINVAL;
6057        }
6058        /* don't enable anything if the ih is disabled */
6059        if (!rdev->ih.enabled) {
6060                si_disable_interrupts(rdev);
6061                /* force the active interrupt state to all disabled */
6062                si_disable_interrupt_state(rdev);
6063                return 0;
6064        }
6065
6066        cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6067                (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6068
6069        dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070        dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071
6072        thermal_int = RREG32(CG_THERMAL_INT) &
6073                ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6074
6075        /* enable CP interrupts on all rings */
6076        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6077                DRM_DEBUG("si_irq_set: sw int gfx\n");
6078                cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6079        }
6080        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6081                DRM_DEBUG("si_irq_set: sw int cp1\n");
6082                cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6083        }
6084        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6085                DRM_DEBUG("si_irq_set: sw int cp2\n");
6086                cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6087        }
6088        if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6089                DRM_DEBUG("si_irq_set: sw int dma\n");
6090                dma_cntl |= TRAP_ENABLE;
6091        }
6092
6093        if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6094                DRM_DEBUG("si_irq_set: sw int dma1\n");
6095                dma_cntl1 |= TRAP_ENABLE;
6096        }
6097
6098        WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6099        WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6100        WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6101
6102        WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6103        WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6104
6105        WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6106
6107        if (rdev->irq.dpm_thermal) {
6108                DRM_DEBUG("dpm thermal\n");
6109                thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6110        }
6111
6112        for (i = 0; i < rdev->num_crtc; i++) {
6113                radeon_irq_kms_set_irq_n_enabled(
6114                    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6115                    rdev->irq.crtc_vblank_int[i] ||
6116                    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6117        }
6118
6119        for (i = 0; i < rdev->num_crtc; i++)
6120                WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6121
6122        if (!ASIC_IS_NODCE(rdev)) {
6123                for (i = 0; i < 6; i++) {
6124                        radeon_irq_kms_set_irq_n_enabled(
6125                            rdev, DC_HPDx_INT_CONTROL(i),
6126                            DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6127                            rdev->irq.hpd[i], "HPD", i);
6128                }
6129        }
6130
6131        WREG32(CG_THERMAL_INT, thermal_int);
6132
6133        /* posting read */
6134        RREG32(SRBM_STATUS);
6135
6136        return 0;
6137}
6138
6139/* The order we write back each register here is important */
6140static inline void si_irq_ack(struct radeon_device *rdev)
6141{
6142        int i, j;
6143        u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6144        u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6145
6146        if (ASIC_IS_NODCE(rdev))
6147                return;
6148
6149        for (i = 0; i < 6; i++) {
6150                disp_int[i] = RREG32(si_disp_int_status[i]);
6151                if (i < rdev->num_crtc)
6152                        grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6153        }
6154
6155        /* We write back each interrupt register in pairs of two */
6156        for (i = 0; i < rdev->num_crtc; i += 2) {
6157                for (j = i; j < (i + 2); j++) {
6158                        if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6159                                WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6160                                       GRPH_PFLIP_INT_CLEAR);
6161                }
6162
6163                for (j = i; j < (i + 2); j++) {
6164                        if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6165                                WREG32(VBLANK_STATUS + crtc_offsets[j],
6166                                       VBLANK_ACK);
6167                        if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6168                                WREG32(VLINE_STATUS + crtc_offsets[j],
6169                                       VLINE_ACK);
6170                }
6171        }
6172
6173        for (i = 0; i < 6; i++) {
6174                if (disp_int[i] & DC_HPD1_INTERRUPT)
6175                        WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6176        }
6177
6178        for (i = 0; i < 6; i++) {
6179                if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6180                        WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6181        }
6182}
6183
6184static void si_irq_disable(struct radeon_device *rdev)
6185{
6186        si_disable_interrupts(rdev);
6187        /* Wait and acknowledge irq */
6188        mdelay(1);
6189        si_irq_ack(rdev);
6190        si_disable_interrupt_state(rdev);
6191}
6192
6193static void si_irq_suspend(struct radeon_device *rdev)
6194{
6195        si_irq_disable(rdev);
6196        si_rlc_stop(rdev);
6197}
6198
6199static void si_irq_fini(struct radeon_device *rdev)
6200{
6201        si_irq_suspend(rdev);
6202        r600_ih_ring_fini(rdev);
6203}
6204
6205static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6206{
6207        u32 wptr, tmp;
6208
6209        if (rdev->wb.enabled)
6210                wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6211        else
6212                wptr = RREG32(IH_RB_WPTR);
6213
6214        if (wptr & RB_OVERFLOW) {
6215                wptr &= ~RB_OVERFLOW;
6216                /* When a ring buffer overflow happen start parsing interrupt
6217                 * from the last not overwritten vector (wptr + 16). Hopefully
6218                 * this should allow us to catchup.
6219                 */
6220                dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6221                         wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6222                rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6223                tmp = RREG32(IH_RB_CNTL);
6224                tmp |= IH_WPTR_OVERFLOW_CLEAR;
6225                WREG32(IH_RB_CNTL, tmp);
6226        }
6227        return (wptr & rdev->ih.ptr_mask);
6228}
6229
6230/*        SI IV Ring
6231 * Each IV ring entry is 128 bits:
6232 * [7:0]    - interrupt source id
6233 * [31:8]   - reserved
6234 * [59:32]  - interrupt source data
6235 * [63:60]  - reserved
6236 * [71:64]  - RINGID
6237 * [79:72]  - VMID
6238 * [127:80] - reserved
6239 */
6240int si_irq_process(struct radeon_device *rdev)
6241{
6242        u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6243        u32 crtc_idx, hpd_idx;
6244        u32 mask;
6245        u32 wptr;
6246        u32 rptr;
6247        u32 src_id, src_data, ring_id;
6248        u32 ring_index;
6249        bool queue_hotplug = false;
6250        bool queue_dp = false;
6251        bool queue_thermal = false;
6252        u32 status, addr;
6253        const char *event_name;
6254
6255        if (!rdev->ih.enabled || rdev->shutdown)
6256                return IRQ_NONE;
6257
6258        wptr = si_get_ih_wptr(rdev);
6259
6260restart_ih:
6261        /* is somebody else already processing irqs? */
6262        if (atomic_xchg(&rdev->ih.lock, 1))
6263                return IRQ_NONE;
6264
6265        rptr = rdev->ih.rptr;
6266        DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6267
6268        /* Order reading of wptr vs. reading of IH ring data */
6269        rmb();
6270
6271        /* display interrupts */
6272        si_irq_ack(rdev);
6273
6274        while (rptr != wptr) {
6275                /* wptr/rptr are in bytes! */
6276                ring_index = rptr / 4;
6277                src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6278                src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6279                ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6280
6281                switch (src_id) {
6282                case 1: /* D1 vblank/vline */
6283                case 2: /* D2 vblank/vline */
6284                case 3: /* D3 vblank/vline */
6285                case 4: /* D4 vblank/vline */
6286                case 5: /* D5 vblank/vline */
6287                case 6: /* D6 vblank/vline */
6288                        crtc_idx = src_id - 1;
6289
6290                        if (src_data == 0) { /* vblank */
6291                                mask = LB_D1_VBLANK_INTERRUPT;
6292                                event_name = "vblank";
6293
6294                                if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6295                                        drm_handle_vblank(rdev->ddev, crtc_idx);
6296                                        rdev->pm.vblank_sync = true;
6297                                        wake_up(&rdev->irq.vblank_queue);
6298                                }
6299                                if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6300                                        radeon_crtc_handle_vblank(rdev,
6301                                                                  crtc_idx);
6302                                }
6303
6304                        } else if (src_data == 1) { /* vline */
6305                                mask = LB_D1_VLINE_INTERRUPT;
6306                                event_name = "vline";
6307                        } else {
6308                                DRM_DEBUG("Unhandled interrupt: %d %d\n",
6309                                          src_id, src_data);
6310                                break;
6311                        }
6312
6313                        if (!(disp_int[crtc_idx] & mask)) {
6314                                DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6315                                          crtc_idx + 1, event_name);
6316                        }
6317
6318                        disp_int[crtc_idx] &= ~mask;
6319                        DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6320
6321                        break;
6322                case 8: /* D1 page flip */
6323                case 10: /* D2 page flip */
6324                case 12: /* D3 page flip */
6325                case 14: /* D4 page flip */
6326                case 16: /* D5 page flip */
6327                case 18: /* D6 page flip */
6328                        DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6329                        if (radeon_use_pflipirq > 0)
6330                                radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6331                        break;
6332                case 42: /* HPD hotplug */
6333                        if (src_data <= 5) {
6334                                hpd_idx = src_data;
6335                                mask = DC_HPD1_INTERRUPT;
6336                                queue_hotplug = true;
6337                                event_name = "HPD";
6338
6339                        } else if (src_data <= 11) {
6340                                hpd_idx = src_data - 6;
6341                                mask = DC_HPD1_RX_INTERRUPT;
6342                                queue_dp = true;
6343                                event_name = "HPD_RX";
6344
6345                        } else {
6346                                DRM_DEBUG("Unhandled interrupt: %d %d\n",
6347                                          src_id, src_data);
6348                                break;
6349                        }
6350
6351                        if (!(disp_int[hpd_idx] & mask))
6352                                DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6353
6354                        disp_int[hpd_idx] &= ~mask;
6355                        DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6356                        break;
6357                case 96:
6358                        DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6359                        WREG32(SRBM_INT_ACK, 0x1);
6360                        break;
6361                case 124: /* UVD */
6362                        DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6363                        radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6364                        break;
6365                case 146:
6366                case 147:
6367                        addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6368                        status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6369                        /* reset addr and status */
6370                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6371                        if (addr == 0x0 && status == 0x0)
6372                                break;
6373                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6374                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6375                                addr);
6376                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6377                                status);
6378                        si_vm_decode_fault(rdev, status, addr);
6379                        break;
6380                case 176: /* RINGID0 CP_INT */
6381                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6382                        break;
6383                case 177: /* RINGID1 CP_INT */
6384                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6385                        break;
6386                case 178: /* RINGID2 CP_INT */
6387                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6388                        break;
6389                case 181: /* CP EOP event */
6390                        DRM_DEBUG("IH: CP EOP\n");
6391                        switch (ring_id) {
6392                        case 0:
6393                                radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6394                                break;
6395                        case 1:
6396                                radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6397                                break;
6398                        case 2:
6399                                radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6400                                break;
6401                        }
6402                        break;
6403                case 224: /* DMA trap event */
6404                        DRM_DEBUG("IH: DMA trap\n");
6405                        radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6406                        break;
6407                case 230: /* thermal low to high */
6408                        DRM_DEBUG("IH: thermal low to high\n");
6409                        rdev->pm.dpm.thermal.high_to_low = false;
6410                        queue_thermal = true;
6411                        break;
6412                case 231: /* thermal high to low */
6413                        DRM_DEBUG("IH: thermal high to low\n");
6414                        rdev->pm.dpm.thermal.high_to_low = true;
6415                        queue_thermal = true;
6416                        break;
6417                case 233: /* GUI IDLE */
6418                        DRM_DEBUG("IH: GUI idle\n");
6419                        break;
6420                case 244: /* DMA trap event */
6421                        DRM_DEBUG("IH: DMA1 trap\n");
6422                        radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6423                        break;
6424                default:
6425                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6426                        break;
6427                }
6428
6429                /* wptr/rptr are in bytes! */
6430                rptr += 16;
6431                rptr &= rdev->ih.ptr_mask;
6432                WREG32(IH_RB_RPTR, rptr);
6433        }
6434        if (queue_dp)
6435                schedule_work(&rdev->dp_work);
6436        if (queue_hotplug)
6437                schedule_delayed_work(&rdev->hotplug_work, 0);
6438        if (queue_thermal && rdev->pm.dpm_enabled)
6439                schedule_work(&rdev->pm.dpm.thermal.work);
6440        rdev->ih.rptr = rptr;
6441        atomic_set(&rdev->ih.lock, 0);
6442
6443        /* make sure wptr hasn't changed while processing */
6444        wptr = si_get_ih_wptr(rdev);
6445        if (wptr != rptr)
6446                goto restart_ih;
6447
6448        return IRQ_HANDLED;
6449}
6450
6451/*
6452 * startup/shutdown callbacks
6453 */
6454static void si_uvd_init(struct radeon_device *rdev)
6455{
6456        int r;
6457
6458        if (!rdev->has_uvd)
6459                return;
6460
6461        r = radeon_uvd_init(rdev);
6462        if (r) {
6463                dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6464                /*
6465                 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6466                 * to early fails uvd_v2_2_resume() and thus nothing happens
6467                 * there. So it is pointless to try to go through that code
6468                 * hence why we disable uvd here.
6469                 */
6470                rdev->has_uvd = false;
6471                return;
6472        }
6473        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6474        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6475}
6476
6477static void si_uvd_start(struct radeon_device *rdev)
6478{
6479        int r;
6480
6481        if (!rdev->has_uvd)
6482                return;
6483
6484        r = uvd_v2_2_resume(rdev);
6485        if (r) {
6486                dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6487                goto error;
6488        }
6489        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6490        if (r) {
6491                dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6492                goto error;
6493        }
6494        return;
6495
6496error:
6497        rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6498}
6499
6500static void si_uvd_resume(struct radeon_device *rdev)
6501{
6502        struct radeon_ring *ring;
6503        int r;
6504
6505        if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6506                return;
6507
6508        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6509        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6510        if (r) {
6511                dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6512                return;
6513        }
6514        r = uvd_v1_0_init(rdev);
6515        if (r) {
6516                dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6517                return;
6518        }
6519}
6520
6521static void si_vce_init(struct radeon_device *rdev)
6522{
6523        int r;
6524
6525        if (!rdev->has_vce)
6526                return;
6527
6528        r = radeon_vce_init(rdev);
6529        if (r) {
6530                dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6531                /*
6532                 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6533                 * to early fails si_vce_start() and thus nothing happens
6534                 * there. So it is pointless to try to go through that code
6535                 * hence why we disable vce here.
6536                 */
6537                rdev->has_vce = false;
6538                return;
6539        }
6540        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6541        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6542        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6543        r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6544}
6545
6546static void si_vce_start(struct radeon_device *rdev)
6547{
6548        int r;
6549
6550        if (!rdev->has_vce)
6551                return;
6552
6553        r = radeon_vce_resume(rdev);
6554        if (r) {
6555                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6556                goto error;
6557        }
6558        r = vce_v1_0_resume(rdev);
6559        if (r) {
6560                dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561                goto error;
6562        }
6563        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6564        if (r) {
6565                dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6566                goto error;
6567        }
6568        r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6569        if (r) {
6570                dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6571                goto error;
6572        }
6573        return;
6574
6575error:
6576        rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6577        rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6578}
6579
6580static void si_vce_resume(struct radeon_device *rdev)
6581{
6582        struct radeon_ring *ring;
6583        int r;
6584
6585        if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6586                return;
6587
6588        ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6589        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6590        if (r) {
6591                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6592                return;
6593        }
6594        ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6595        r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6596        if (r) {
6597                dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6598                return;
6599        }
6600        r = vce_v1_0_init(rdev);
6601        if (r) {
6602                dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6603                return;
6604        }
6605}
6606
6607static int si_startup(struct radeon_device *rdev)
6608{
6609        struct radeon_ring *ring;
6610        int r;
6611
6612        /* enable pcie gen2/3 link */
6613        si_pcie_gen3_enable(rdev);
6614        /* enable aspm */
6615        si_program_aspm(rdev);
6616
6617        /* scratch needs to be initialized before MC */
6618        r = r600_vram_scratch_init(rdev);
6619        if (r)
6620                return r;
6621
6622        si_mc_program(rdev);
6623
6624        if (!rdev->pm.dpm_enabled) {
6625                r = si_mc_load_microcode(rdev);
6626                if (r) {
6627                        DRM_ERROR("Failed to load MC firmware!\n");
6628                        return r;
6629                }
6630        }
6631
6632        r = si_pcie_gart_enable(rdev);
6633        if (r)
6634                return r;
6635        si_gpu_init(rdev);
6636
6637        /* allocate rlc buffers */
6638        if (rdev->family == CHIP_VERDE) {
6639                rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6640                rdev->rlc.reg_list_size =
6641                        (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6642        }
6643        rdev->rlc.cs_data = si_cs_data;
6644        r = sumo_rlc_init(rdev);
6645        if (r) {
6646                DRM_ERROR("Failed to init rlc BOs!\n");
6647                return r;
6648        }
6649
6650        /* allocate wb buffer */
6651        r = radeon_wb_init(rdev);
6652        if (r)
6653                return r;
6654
6655        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656        if (r) {
6657                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6658                return r;
6659        }
6660
6661        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6662        if (r) {
6663                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6664                return r;
6665        }
6666
6667        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6668        if (r) {
6669                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6670                return r;
6671        }
6672
6673        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6674        if (r) {
6675                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6676                return r;
6677        }
6678
6679        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6680        if (r) {
6681                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6682                return r;
6683        }
6684
6685        si_uvd_start(rdev);
6686        si_vce_start(rdev);
6687
6688        /* Enable IRQ */
6689        if (!rdev->irq.installed) {
6690                r = radeon_irq_kms_init(rdev);
6691                if (r)
6692                        return r;
6693        }
6694
6695        r = si_irq_init(rdev);
6696        if (r) {
6697                DRM_ERROR("radeon: IH init failed (%d).\n", r);
6698                radeon_irq_kms_fini(rdev);
6699                return r;
6700        }
6701        si_irq_set(rdev);
6702
6703        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6704        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6705                             RADEON_CP_PACKET2);
6706        if (r)
6707                return r;
6708
6709        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6710        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6711                             RADEON_CP_PACKET2);
6712        if (r)
6713                return r;
6714
6715        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6716        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6717                             RADEON_CP_PACKET2);
6718        if (r)
6719                return r;
6720
6721        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6722        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6723                             DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6724        if (r)
6725                return r;
6726
6727        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6728        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6729                             DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6730        if (r)
6731                return r;
6732
6733        r = si_cp_load_microcode(rdev);
6734        if (r)
6735                return r;
6736        r = si_cp_resume(rdev);
6737        if (r)
6738                return r;
6739
6740        r = cayman_dma_resume(rdev);
6741        if (r)
6742                return r;
6743
6744        si_uvd_resume(rdev);
6745        si_vce_resume(rdev);
6746
6747        r = radeon_ib_pool_init(rdev);
6748        if (r) {
6749                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6750                return r;
6751        }
6752
6753        r = radeon_vm_manager_init(rdev);
6754        if (r) {
6755                dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6756                return r;
6757        }
6758
6759        r = radeon_audio_init(rdev);
6760        if (r)
6761                return r;
6762
6763        return 0;
6764}
6765
6766int si_resume(struct radeon_device *rdev)
6767{
6768        int r;
6769
6770        /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6771         * posting will perform necessary task to bring back GPU into good
6772         * shape.
6773         */
6774        /* post card */
6775        atom_asic_init(rdev->mode_info.atom_context);
6776
6777        /* init golden registers */
6778        si_init_golden_registers(rdev);
6779
6780        if (rdev->pm.pm_method == PM_METHOD_DPM)
6781                radeon_pm_resume(rdev);
6782
6783        rdev->accel_working = true;
6784        r = si_startup(rdev);
6785        if (r) {
6786                DRM_ERROR("si startup failed on resume\n");
6787                rdev->accel_working = false;
6788                return r;
6789        }
6790
6791        return r;
6792
6793}
6794
6795int si_suspend(struct radeon_device *rdev)
6796{
6797        radeon_pm_suspend(rdev);
6798        radeon_audio_fini(rdev);
6799        radeon_vm_manager_fini(rdev);
6800        si_cp_enable(rdev, false);
6801        cayman_dma_stop(rdev);
6802        if (rdev->has_uvd) {
6803                uvd_v1_0_fini(rdev);
6804                radeon_uvd_suspend(rdev);
6805        }
6806        if (rdev->has_vce)
6807                radeon_vce_suspend(rdev);
6808        si_fini_pg(rdev);
6809        si_fini_cg(rdev);
6810        si_irq_suspend(rdev);
6811        radeon_wb_disable(rdev);
6812        si_pcie_gart_disable(rdev);
6813        return 0;
6814}
6815
6816/* Plan is to move initialization in that function and use
6817 * helper function so that radeon_device_init pretty much
6818 * do nothing more than calling asic specific function. This
6819 * should also allow to remove a bunch of callback function
6820 * like vram_info.
6821 */
6822int si_init(struct radeon_device *rdev)
6823{
6824        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6825        int r;
6826
6827        /* Read BIOS */
6828        if (!radeon_get_bios(rdev)) {
6829                if (ASIC_IS_AVIVO(rdev))
6830                        return -EINVAL;
6831        }
6832        /* Must be an ATOMBIOS */
6833        if (!rdev->is_atom_bios) {
6834                dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6835                return -EINVAL;
6836        }
6837        r = radeon_atombios_init(rdev);
6838        if (r)
6839                return r;
6840
6841        /* Post card if necessary */
6842        if (!radeon_card_posted(rdev)) {
6843                if (!rdev->bios) {
6844                        dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6845                        return -EINVAL;
6846                }
6847                DRM_INFO("GPU not posted. posting now...\n");
6848                atom_asic_init(rdev->mode_info.atom_context);
6849        }
6850        /* init golden registers */
6851        si_init_golden_registers(rdev);
6852        /* Initialize scratch registers */
6853        si_scratch_init(rdev);
6854        /* Initialize surface registers */
6855        radeon_surface_init(rdev);
6856        /* Initialize clocks */
6857        radeon_get_clock_info(rdev->ddev);
6858
6859        /* Fence driver */
6860        radeon_fence_driver_init(rdev);
6861
6862        /* initialize memory controller */
6863        r = si_mc_init(rdev);
6864        if (r)
6865                return r;
6866        /* Memory manager */
6867        r = radeon_bo_init(rdev);
6868        if (r)
6869                return r;
6870
6871        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6872            !rdev->rlc_fw || !rdev->mc_fw) {
6873                r = si_init_microcode(rdev);
6874                if (r) {
6875                        DRM_ERROR("Failed to load firmware!\n");
6876                        return r;
6877                }
6878        }
6879
6880        /* Initialize power management */
6881        radeon_pm_init(rdev);
6882
6883        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6884        ring->ring_obj = NULL;
6885        r600_ring_init(rdev, ring, 1024 * 1024);
6886
6887        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6888        ring->ring_obj = NULL;
6889        r600_ring_init(rdev, ring, 1024 * 1024);
6890
6891        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6892        ring->ring_obj = NULL;
6893        r600_ring_init(rdev, ring, 1024 * 1024);
6894
6895        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6896        ring->ring_obj = NULL;
6897        r600_ring_init(rdev, ring, 64 * 1024);
6898
6899        ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6900        ring->ring_obj = NULL;
6901        r600_ring_init(rdev, ring, 64 * 1024);
6902
6903        si_uvd_init(rdev);
6904        si_vce_init(rdev);
6905
6906        rdev->ih.ring_obj = NULL;
6907        r600_ih_ring_init(rdev, 64 * 1024);
6908
6909        r = r600_pcie_gart_init(rdev);
6910        if (r)
6911                return r;
6912
6913        rdev->accel_working = true;
6914        r = si_startup(rdev);
6915        if (r) {
6916                dev_err(rdev->dev, "disabling GPU acceleration\n");
6917                si_cp_fini(rdev);
6918                cayman_dma_fini(rdev);
6919                si_irq_fini(rdev);
6920                sumo_rlc_fini(rdev);
6921                radeon_wb_fini(rdev);
6922                radeon_ib_pool_fini(rdev);
6923                radeon_vm_manager_fini(rdev);
6924                radeon_irq_kms_fini(rdev);
6925                si_pcie_gart_fini(rdev);
6926                rdev->accel_working = false;
6927        }
6928
6929        /* Don't start up if the MC ucode is missing.
6930         * The default clocks and voltages before the MC ucode
6931         * is loaded are not suffient for advanced operations.
6932         */
6933        if (!rdev->mc_fw) {
6934                DRM_ERROR("radeon: MC ucode required for NI+.\n");
6935                return -EINVAL;
6936        }
6937
6938        return 0;
6939}
6940
6941void si_fini(struct radeon_device *rdev)
6942{
6943        radeon_pm_fini(rdev);
6944        si_cp_fini(rdev);
6945        cayman_dma_fini(rdev);
6946        si_fini_pg(rdev);
6947        si_fini_cg(rdev);
6948        si_irq_fini(rdev);
6949        sumo_rlc_fini(rdev);
6950        radeon_wb_fini(rdev);
6951        radeon_vm_manager_fini(rdev);
6952        radeon_ib_pool_fini(rdev);
6953        radeon_irq_kms_fini(rdev);
6954        if (rdev->has_uvd) {
6955                uvd_v1_0_fini(rdev);
6956                radeon_uvd_fini(rdev);
6957        }
6958        if (rdev->has_vce)
6959                radeon_vce_fini(rdev);
6960        si_pcie_gart_fini(rdev);
6961        r600_vram_scratch_fini(rdev);
6962        radeon_gem_fini(rdev);
6963        radeon_fence_driver_fini(rdev);
6964        radeon_bo_fini(rdev);
6965        radeon_atombios_fini(rdev);
6966        kfree(rdev->bios);
6967        rdev->bios = NULL;
6968}
6969
6970/**
6971 * si_get_gpu_clock_counter - return GPU clock counter snapshot
6972 *
6973 * @rdev: radeon_device pointer
6974 *
6975 * Fetches a GPU clock counter snapshot (SI).
6976 * Returns the 64 bit clock counter snapshot.
6977 */
6978uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6979{
6980        uint64_t clock;
6981
6982        mutex_lock(&rdev->gpu_clock_mutex);
6983        WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6984        clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6985                ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6986        mutex_unlock(&rdev->gpu_clock_mutex);
6987        return clock;
6988}
6989
6990int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6991{
6992        unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6993        int r;
6994
6995        /* bypass vclk and dclk with bclk */
6996        WREG32_P(CG_UPLL_FUNC_CNTL_2,
6997                VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6998                ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6999
7000        /* put PLL in bypass mode */
7001        WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7002
7003        if (!vclk || !dclk) {
7004                /* keep the Bypass mode */
7005                return 0;
7006        }
7007
7008        r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7009                                          16384, 0x03FFFFFF, 0, 128, 5,
7010                                          &fb_div, &vclk_div, &dclk_div);
7011        if (r)
7012                return r;
7013
7014        /* set RESET_ANTI_MUX to 0 */
7015        WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7016
7017        /* set VCO_MODE to 1 */
7018        WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7019
7020        /* disable sleep mode */
7021        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7022
7023        /* deassert UPLL_RESET */
7024        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7025
7026        mdelay(1);
7027
7028        r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7029        if (r)
7030                return r;
7031
7032        /* assert UPLL_RESET again */
7033        WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7034
7035        /* disable spread spectrum. */
7036        WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7037
7038        /* set feedback divider */
7039        WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7040
7041        /* set ref divider to 0 */
7042        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7043
7044        if (fb_div < 307200)
7045                WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7046        else
7047                WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7048
7049        /* set PDIV_A and PDIV_B */
7050        WREG32_P(CG_UPLL_FUNC_CNTL_2,
7051                UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7052                ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7053
7054        /* give the PLL some time to settle */
7055        mdelay(15);
7056
7057        /* deassert PLL_RESET */
7058        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7059
7060        mdelay(15);
7061
7062        /* switch from bypass mode to normal mode */
7063        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7064
7065        r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7066        if (r)
7067                return r;
7068
7069        /* switch VCLK and DCLK selection */
7070        WREG32_P(CG_UPLL_FUNC_CNTL_2,
7071                VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7072                ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7073
7074        mdelay(100);
7075
7076        return 0;
7077}
7078
7079static void si_pcie_gen3_enable(struct radeon_device *rdev)
7080{
7081        struct pci_dev *root = rdev->pdev->bus->self;
7082        enum pci_bus_speed speed_cap;
7083        u32 speed_cntl, current_data_rate;
7084        int i;
7085        u16 tmp16;
7086
7087        if (pci_is_root_bus(rdev->pdev->bus))
7088                return;
7089
7090        if (radeon_pcie_gen2 == 0)
7091                return;
7092
7093        if (rdev->flags & RADEON_IS_IGP)
7094                return;
7095
7096        if (!(rdev->flags & RADEON_IS_PCIE))
7097                return;
7098
7099        speed_cap = pcie_get_speed_cap(root);
7100        if (speed_cap == PCI_SPEED_UNKNOWN)
7101                return;
7102
7103        if ((speed_cap != PCIE_SPEED_8_0GT) &&
7104            (speed_cap != PCIE_SPEED_5_0GT))
7105                return;
7106
7107        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7108        current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7109                LC_CURRENT_DATA_RATE_SHIFT;
7110        if (speed_cap == PCIE_SPEED_8_0GT) {
7111                if (current_data_rate == 2) {
7112                        DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7113                        return;
7114                }
7115                DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7116        } else if (speed_cap == PCIE_SPEED_5_0GT) {
7117                if (current_data_rate == 1) {
7118                        DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7119                        return;
7120                }
7121                DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7122        }
7123
7124        if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7125                return;
7126
7127        if (speed_cap == PCIE_SPEED_8_0GT) {
7128                /* re-try equalization if gen3 is not already enabled */
7129                if (current_data_rate != 2) {
7130                        u16 bridge_cfg, gpu_cfg;
7131                        u16 bridge_cfg2, gpu_cfg2;
7132                        u32 max_lw, current_lw, tmp;
7133
7134                        pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7135                                                  &bridge_cfg);
7136                        pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7137                                                  &gpu_cfg);
7138
7139                        tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7140                        pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7141
7142                        tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7143                        pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7144                                                   tmp16);
7145
7146                        tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7147                        max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7148                        current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7149
7150                        if (current_lw < max_lw) {
7151                                tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7152                                if (tmp & LC_RENEGOTIATION_SUPPORT) {
7153                                        tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7154                                        tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7155                                        tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7156                                        WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7157                                }
7158                        }
7159
7160                        for (i = 0; i < 10; i++) {
7161                                /* check status */
7162                                pcie_capability_read_word(rdev->pdev,
7163                                                          PCI_EXP_DEVSTA,
7164                                                          &tmp16);
7165                                if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7166                                        break;
7167
7168                                pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7169                                                          &bridge_cfg);
7170                                pcie_capability_read_word(rdev->pdev,
7171                                                          PCI_EXP_LNKCTL,
7172                                                          &gpu_cfg);
7173
7174                                pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7175                                                          &bridge_cfg2);
7176                                pcie_capability_read_word(rdev->pdev,
7177                                                          PCI_EXP_LNKCTL2,
7178                                                          &gpu_cfg2);
7179
7180                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7181                                tmp |= LC_SET_QUIESCE;
7182                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7183
7184                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7185                                tmp |= LC_REDO_EQ;
7186                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7187
7188                                msleep(100);
7189
7190                                /* linkctl */
7191                                pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7192                                                          &tmp16);
7193                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7194                                tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7195                                pcie_capability_write_word(root,
7196                                                           PCI_EXP_LNKCTL,
7197                                                           tmp16);
7198
7199                                pcie_capability_read_word(rdev->pdev,
7200                                                          PCI_EXP_LNKCTL,
7201                                                          &tmp16);
7202                                tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7203                                tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7204                                pcie_capability_write_word(rdev->pdev,
7205                                                           PCI_EXP_LNKCTL,
7206                                                           tmp16);
7207
7208                                /* linkctl2 */
7209                                pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7210                                                          &tmp16);
7211                                tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7212                                           PCI_EXP_LNKCTL2_TX_MARGIN);
7213                                tmp16 |= (bridge_cfg2 &
7214                                          (PCI_EXP_LNKCTL2_ENTER_COMP |
7215                                           PCI_EXP_LNKCTL2_TX_MARGIN));
7216                                pcie_capability_write_word(root,
7217                                                           PCI_EXP_LNKCTL2,
7218                                                           tmp16);
7219
7220                                pcie_capability_read_word(rdev->pdev,
7221                                                          PCI_EXP_LNKCTL2,
7222                                                          &tmp16);
7223                                tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7224                                           PCI_EXP_LNKCTL2_TX_MARGIN);
7225                                tmp16 |= (gpu_cfg2 &
7226                                          (PCI_EXP_LNKCTL2_ENTER_COMP |
7227                                           PCI_EXP_LNKCTL2_TX_MARGIN));
7228                                pcie_capability_write_word(rdev->pdev,
7229                                                           PCI_EXP_LNKCTL2,
7230                                                           tmp16);
7231
7232                                tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7233                                tmp &= ~LC_SET_QUIESCE;
7234                                WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7235                        }
7236                }
7237        }
7238
7239        /* set the link speed */
7240        speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7241        speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7242        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7243
7244        pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7245        tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7246        if (speed_cap == PCIE_SPEED_8_0GT)
7247                tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7248        else if (speed_cap == PCIE_SPEED_5_0GT)
7249                tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7250        else
7251                tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7252        pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7253
7254        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7255        speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7256        WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7257
7258        for (i = 0; i < rdev->usec_timeout; i++) {
7259                speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7260                if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7261                        break;
7262                udelay(1);
7263        }
7264}
7265
7266static void si_program_aspm(struct radeon_device *rdev)
7267{
7268        u32 data, orig;
7269        bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7270        bool disable_clkreq = false;
7271
7272        if (radeon_aspm == 0)
7273                return;
7274
7275        if (!(rdev->flags & RADEON_IS_PCIE))
7276                return;
7277
7278        orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7279        data &= ~LC_XMIT_N_FTS_MASK;
7280        data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7281        if (orig != data)
7282                WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7283
7284        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7285        data |= LC_GO_TO_RECOVERY;
7286        if (orig != data)
7287                WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7288
7289        orig = data = RREG32_PCIE(PCIE_P_CNTL);
7290        data |= P_IGNORE_EDB_ERR;
7291        if (orig != data)
7292                WREG32_PCIE(PCIE_P_CNTL, data);
7293
7294        orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7295        data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7296        data |= LC_PMI_TO_L1_DIS;
7297        if (!disable_l0s)
7298                data |= LC_L0S_INACTIVITY(7);
7299
7300        if (!disable_l1) {
7301                data |= LC_L1_INACTIVITY(7);
7302                data &= ~LC_PMI_TO_L1_DIS;
7303                if (orig != data)
7304                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7305
7306                if (!disable_plloff_in_l1) {
7307                        bool clk_req_support;
7308
7309                        orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7310                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7311                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7312                        if (orig != data)
7313                                WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7314
7315                        orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7316                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7317                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7318                        if (orig != data)
7319                                WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7320
7321                        orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7322                        data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7323                        data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7324                        if (orig != data)
7325                                WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7326
7327                        orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7328                        data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7329                        data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7330                        if (orig != data)
7331                                WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7332
7333                        if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7334                                orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7335                                data &= ~PLL_RAMP_UP_TIME_0_MASK;
7336                                if (orig != data)
7337                                        WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7338
7339                                orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7340                                data &= ~PLL_RAMP_UP_TIME_1_MASK;
7341                                if (orig != data)
7342                                        WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7343
7344                                orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7345                                data &= ~PLL_RAMP_UP_TIME_2_MASK;
7346                                if (orig != data)
7347                                        WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7348
7349                                orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7350                                data &= ~PLL_RAMP_UP_TIME_3_MASK;
7351                                if (orig != data)
7352                                        WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7353
7354                                orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7355                                data &= ~PLL_RAMP_UP_TIME_0_MASK;
7356                                if (orig != data)
7357                                        WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7358
7359                                orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7360                                data &= ~PLL_RAMP_UP_TIME_1_MASK;
7361                                if (orig != data)
7362                                        WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7363
7364                                orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7365                                data &= ~PLL_RAMP_UP_TIME_2_MASK;
7366                                if (orig != data)
7367                                        WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7368
7369                                orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7370                                data &= ~PLL_RAMP_UP_TIME_3_MASK;
7371                                if (orig != data)
7372                                        WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7373                        }
7374                        orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7375                        data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7376                        data |= LC_DYN_LANES_PWR_STATE(3);
7377                        if (orig != data)
7378                                WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7379
7380                        orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7381                        data &= ~LS2_EXIT_TIME_MASK;
7382                        if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7383                                data |= LS2_EXIT_TIME(5);
7384                        if (orig != data)
7385                                WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7386
7387                        orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7388                        data &= ~LS2_EXIT_TIME_MASK;
7389                        if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390                                data |= LS2_EXIT_TIME(5);
7391                        if (orig != data)
7392                                WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7393
7394                        if (!disable_clkreq &&
7395                            !pci_is_root_bus(rdev->pdev->bus)) {
7396                                struct pci_dev *root = rdev->pdev->bus->self;
7397                                u32 lnkcap;
7398
7399                                clk_req_support = false;
7400                                pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7401                                if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7402                                        clk_req_support = true;
7403                        } else {
7404                                clk_req_support = false;
7405                        }
7406
7407                        if (clk_req_support) {
7408                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7409                                data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7410                                if (orig != data)
7411                                        WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7412
7413                                orig = data = RREG32(THM_CLK_CNTL);
7414                                data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7415                                data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7416                                if (orig != data)
7417                                        WREG32(THM_CLK_CNTL, data);
7418
7419                                orig = data = RREG32(MISC_CLK_CNTL);
7420                                data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7421                                data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7422                                if (orig != data)
7423                                        WREG32(MISC_CLK_CNTL, data);
7424
7425                                orig = data = RREG32(CG_CLKPIN_CNTL);
7426                                data &= ~BCLK_AS_XCLK;
7427                                if (orig != data)
7428                                        WREG32(CG_CLKPIN_CNTL, data);
7429
7430                                orig = data = RREG32(CG_CLKPIN_CNTL_2);
7431                                data &= ~FORCE_BIF_REFCLK_EN;
7432                                if (orig != data)
7433                                        WREG32(CG_CLKPIN_CNTL_2, data);
7434
7435                                orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7436                                data &= ~MPLL_CLKOUT_SEL_MASK;
7437                                data |= MPLL_CLKOUT_SEL(4);
7438                                if (orig != data)
7439                                        WREG32(MPLL_BYPASSCLK_SEL, data);
7440
7441                                orig = data = RREG32(SPLL_CNTL_MODE);
7442                                data &= ~SPLL_REFCLK_SEL_MASK;
7443                                if (orig != data)
7444                                        WREG32(SPLL_CNTL_MODE, data);
7445                        }
7446                }
7447        } else {
7448                if (orig != data)
7449                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7450        }
7451
7452        orig = data = RREG32_PCIE(PCIE_CNTL2);
7453        data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7454        if (orig != data)
7455                WREG32_PCIE(PCIE_CNTL2, data);
7456
7457        if (!disable_l0s) {
7458                data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7459                if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7460                        data = RREG32_PCIE(PCIE_LC_STATUS1);
7461                        if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7462                                orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7463                                data &= ~LC_L0S_INACTIVITY_MASK;
7464                                if (orig != data)
7465                                        WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7466                        }
7467                }
7468        }
7469}
7470
7471static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7472{
7473        unsigned i;
7474
7475        /* make sure VCEPLL_CTLREQ is deasserted */
7476        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7477
7478        mdelay(10);
7479
7480        /* assert UPLL_CTLREQ */
7481        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7482
7483        /* wait for CTLACK and CTLACK2 to get asserted */
7484        for (i = 0; i < 100; ++i) {
7485                uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7486                if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7487                        break;
7488                mdelay(10);
7489        }
7490
7491        /* deassert UPLL_CTLREQ */
7492        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7493
7494        if (i == 100) {
7495                DRM_ERROR("Timeout setting UVD clocks!\n");
7496                return -ETIMEDOUT;
7497        }
7498
7499        return 0;
7500}
7501
7502int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7503{
7504        unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7505        int r;
7506
7507        /* bypass evclk and ecclk with bclk */
7508        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7509                     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7510                     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7511
7512        /* put PLL in bypass mode */
7513        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7514                     ~VCEPLL_BYPASS_EN_MASK);
7515
7516        if (!evclk || !ecclk) {
7517                /* keep the Bypass mode, put PLL to sleep */
7518                WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7519                             ~VCEPLL_SLEEP_MASK);
7520                return 0;
7521        }
7522
7523        r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7524                                          16384, 0x03FFFFFF, 0, 128, 5,
7525                                          &fb_div, &evclk_div, &ecclk_div);
7526        if (r)
7527                return r;
7528
7529        /* set RESET_ANTI_MUX to 0 */
7530        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7531
7532        /* set VCO_MODE to 1 */
7533        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7534                     ~VCEPLL_VCO_MODE_MASK);
7535
7536        /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7537        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7538                     ~VCEPLL_SLEEP_MASK);
7539        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7540
7541        /* deassert VCEPLL_RESET */
7542        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7543
7544        mdelay(1);
7545
7546        r = si_vce_send_vcepll_ctlreq(rdev);
7547        if (r)
7548                return r;
7549
7550        /* assert VCEPLL_RESET again */
7551        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7552
7553        /* disable spread spectrum. */
7554        WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7555
7556        /* set feedback divider */
7557        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7558
7559        /* set ref divider to 0 */
7560        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7561
7562        /* set PDIV_A and PDIV_B */
7563        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7564                     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7565                     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7566
7567        /* give the PLL some time to settle */
7568        mdelay(15);
7569
7570        /* deassert PLL_RESET */
7571        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7572
7573        mdelay(15);
7574
7575        /* switch from bypass mode to normal mode */
7576        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7577
7578        r = si_vce_send_vcepll_ctlreq(rdev);
7579        if (r)
7580                return r;
7581
7582        /* switch VCLK and DCLK selection */
7583        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7584                     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7585                     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7586
7587        mdelay(100);
7588
7589        return 0;
7590}
7591